mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge pull request #61749 from ClickHouse/pufit/volnitsky-assert-fix
Fix crash in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8
This commit is contained in:
commit
cd3edf3f5b
@ -191,7 +191,8 @@ namespace VolnitskyTraits
|
||||
if (length_l != length_r)
|
||||
return false;
|
||||
|
||||
assert(length_l >= 2 && length_r >= 2);
|
||||
if (length_l < 2 || length_r < 2)
|
||||
return false; /// Some part of the given ngram contains an invalid UTF-8 sequence.
|
||||
|
||||
chars.c0 = seq_l[seq_ngram_offset];
|
||||
chars.c1 = seq_l[seq_ngram_offset + 1];
|
||||
@ -253,7 +254,9 @@ namespace VolnitskyTraits
|
||||
if (size_l != size_u)
|
||||
return false;
|
||||
|
||||
assert(size_l >= 1 && size_u >= 1);
|
||||
if (size_l == 0 || size_u == 0)
|
||||
return false; /// Some part of the given ngram contains an invalid UTF-8 sequence.
|
||||
|
||||
chars.c1 = seq_l[0];
|
||||
putNGramBase(n, offset);
|
||||
|
||||
@ -276,7 +279,8 @@ namespace VolnitskyTraits
|
||||
if (size_l != size_u)
|
||||
return false;
|
||||
|
||||
assert(size_l > seq_ngram_offset && size_u > seq_ngram_offset);
|
||||
if (size_l <= seq_ngram_offset || size_u <= seq_ngram_offset)
|
||||
return false; /// Some part of the given ngram contains an invalid UTF-8 sequence.
|
||||
|
||||
chars.c0 = seq_l[seq_ngram_offset];
|
||||
putNGramBase(n, offset);
|
||||
@ -302,10 +306,8 @@ namespace VolnitskyTraits
|
||||
if (size_first_l != size_first_u || size_second_l != size_second_u)
|
||||
return false;
|
||||
|
||||
assert(size_first_l > seq_ngram_offset);
|
||||
assert(size_first_u > seq_ngram_offset);
|
||||
assert(size_second_l > 0);
|
||||
assert(size_second_u > 0);
|
||||
if (size_first_l <= seq_ngram_offset || size_first_u <= seq_ngram_offset || size_second_l == 0 || size_second_u == 0)
|
||||
return false;
|
||||
|
||||
auto c0l = first_l_seq[seq_ngram_offset];
|
||||
auto c0u = first_u_seq[seq_ngram_offset];
|
||||
@ -399,7 +401,7 @@ public:
|
||||
if (fallback || fallback_searcher.force_fallback)
|
||||
return;
|
||||
|
||||
hash = std::unique_ptr<VolnitskyTraits::Offset[]>(new VolnitskyTraits::Offset[VolnitskyTraits::hash_size]{});
|
||||
hash = std::make_unique<VolnitskyTraits::Offset[]>(VolnitskyTraits::hash_size);
|
||||
|
||||
auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); };
|
||||
/// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
|
||||
|
@ -12872,3 +12872,4 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
|
@ -223,6 +223,8 @@ select [2] = multiSearchAllPositions(materialize('abab'), materialize(['ba']));
|
||||
select [1] = multiSearchAllPositionsCaseInsensitive(materialize('aBaB'), materialize(['abab']));
|
||||
select [3] = multiSearchAllPositionsUTF8(materialize('ab€ab'), materialize(['€']));
|
||||
select [3] = multiSearchAllPositionsCaseInsensitiveUTF8(materialize('ab€AB'), materialize(['€ab']));
|
||||
-- checks the correct handling of broken utf-8 sequence
|
||||
select [0] = multiSearchAllPositionsCaseInsensitiveUTF8(materialize(''), materialize(['a\x90\x90\x90\x90\x90\x90']));
|
||||
|
||||
select 1 = multiSearchAny(materialize('abcdefgh'), ['b']);
|
||||
select 1 = multiSearchAny(materialize('abcdefgh'), ['bc']);
|
||||
|
Loading…
Reference in New Issue
Block a user