Don't crash in functions multiMatch[Fuzzy](AllIndices/Any/AnyIndex)() with empty needle

Queries like
  "select multiMatchAnyIndex('abc', []::Array(String))"
were not properly handled and crashed.
This commit is contained in:
Robert Schulze 2022-07-08 11:18:53 +00:00
parent af1136c990
commit 8e1a3cd194
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
4 changed files with 71 additions and 0 deletions

View File

@ -80,6 +80,12 @@ struct MultiMatchAllIndicesImpl
offsets.resize(haystack_offsets.size());
if (needles_arr.empty())
{
std::fill(offsets.begin(), offsets.end(), 0);
return;
}
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices=*/true, WithEditDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
@ -181,6 +187,14 @@ struct MultiMatchAllIndicesImpl
needles.emplace_back(needles_data_string->getDataAt(j).toView());
}
if (needles.empty())
{
offsets[i] = (i == 0) ? 0 : offsets[i-1];
prev_haystack_offset = haystack_offsets[i];
prev_needles_offset = needles_offsets[i];
continue;
}
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices=*/true, WithEditDistance>(needles, edit_distance);

View File

@ -93,6 +93,12 @@ struct MultiMatchAnyImpl
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
res.resize(haystack_offsets.size());
if (needles_arr.empty())
{
std::fill(res.begin(), res.end(), 0);
return;
}
#if USE_VECTORSCAN
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices*/ FindAnyIndex, WithEditDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
@ -208,6 +214,14 @@ struct MultiMatchAnyImpl
needles.emplace_back(needles_data_string->getDataAt(j).toView());
}
if (needles.empty())
{
res[i] = 0;
prev_haystack_offset = haystack_offsets[i];
prev_needles_offset = needles_offsets[i];
continue;
}
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices*/ FindAnyIndex, WithEditDistance>(needles, edit_distance);
@ -288,6 +302,13 @@ struct MultiMatchAnyImpl
needles.emplace_back(needles_data_string->getDataAt(j).toView());
}
if (needles.empty())
{
prev_haystack_offset = haystack_offsets[i];
prev_needles_offset = needles_offsets[i];
continue;
}
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
for (size_t j = 0; j < needles.size(); ++j)

View File

@ -601,6 +601,21 @@
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
@ -1227,6 +1242,21 @@ All tests above must return 1, all tests below return something.
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0

View File

@ -29,6 +29,7 @@ select 1 = multiMatchAny(materialize(''), ['']);
select 0 = multiMatchAny(materialize(''), ['some string']);
select 1 = multiMatchAny(materialize('abc'), ['']);
select 1 = multiMatchAny(materialize('abc'), ['']) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), []::Array(String)) from system.numbers limit 5;
select 0 = multiMatchAny(materialize('abc'), ['defgh']);
select 0 = multiMatchAny(materialize('abc'), ['defg']);
@ -77,6 +78,7 @@ select 1 = multiMatchAny(materialize('abcdef'), ['a......', 'a.....']) from syst
select 0 = multiMatchAny(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abc'), ['a\0d']) from system.numbers limit 10;
select 0 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), []::Array(String)) from system.numbers limit 5;
select 1 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['google', 'unian1']) from system.numbers limit 10;
select 2 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['google1', 'unian']) from system.numbers limit 10;
select 0 != multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10;
@ -86,6 +88,7 @@ SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/',
SELECT 1 = multiMatchAny('фабрикант', ['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']);
-- All indices tests
SELECT [] = multiMatchAllIndices(materialize('Butterbrot!'), []::Array(String)) from system.numbers limit 5;
SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['.*goo.*', '.*yan.*'])) from system.numbers limit 5;
SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['.*goo.*', 'neverexisted', '.*yan.*'])) from system.numbers limit 5;
SELECT [] = multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['neverexisted', 'anotherone', 'andanotherone']) from system.numbers limit 5;
@ -124,6 +127,7 @@ select 1 = multiMatchAny(materialize(''), materialize(['']));
select 0 = multiMatchAny(materialize(''), materialize(['some string']));
select 1 = multiMatchAny(materialize('abc'), materialize(['']));
select 1 = multiMatchAny(materialize('abc'), materialize([''])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize([]::Array(String))) from system.numbers limit 5;
select 0 = multiMatchAny(materialize('abc'), materialize(['defgh']));
select 0 = multiMatchAny(materialize('abc'), materialize(['defg']));
@ -172,6 +176,7 @@ select 1 = multiMatchAny(materialize('abcdef'), materialize(['a......', 'a.....'
select 0 = multiMatchAny(materialize('aaaa'), materialize(['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abc'), materialize(['a\0d'])) from system.numbers limit 10;
select 0 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), materialize([]::Array(String))) from system.numbers limit 5;
select 1 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), materialize(['google', 'unian1'])) from system.numbers limit 10;
select 2 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), materialize(['google1', 'unian'])) from system.numbers limit 10;
select 0 != multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), materialize(['.*goo.*', '.*yan.*'])) from system.numbers limit 10;
@ -181,6 +186,7 @@ SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), materialize(['/odezhda-
SELECT 1 = multiMatchAny(materialize('фабрикант'), materialize(['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']));
-- All indices tests
SELECT [] = multiMatchAllIndices(materialize('Butterbrot!'), materialize([]::Array(String))) from system.numbers limit 5;
SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), materialize(['.*goo.*', '.*yan.*']))) from system.numbers limit 5;
SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), materialize(['.*goo.*', 'neverexisted', '.*yan.*']))) from system.numbers limit 5;
SELECT [] = multiMatchAllIndices(materialize('gogleuedeuniangoogle'), materialize(['neverexisted', 'anotherone', 'andanotherone'])) from system.numbers limit 5;