Allow non-const pattern array argument in multi[Fuzzy]Match*()

Resolves #38046
This commit is contained in:
Robert Schulze 2022-06-26 16:45:16 +00:00
parent 7c812df47e
commit d547aa7849
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
17 changed files with 1156 additions and 55 deletions

View File

@ -46,7 +46,7 @@ public:
size_t getNumberOfArguments() const override { return 3; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -70,7 +70,8 @@ public:
const ColumnPtr & arr_ptr = arguments[2].column;
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
assert(col_haystack_vector); // getReturnTypeImpl() checks the data type
const ColumnConst * col_haystack_const = typeid_cast<const ColumnConst *>(&*column_haystack);
assert(static_cast<bool>(col_haystack_vector) ^ static_cast<bool>(col_haystack_const));
UInt32 edit_distance = 0;
if (const auto * col_const_uint8 = checkAndGetColumnConst<ColumnUInt8>(num_ptr.get()))
@ -82,9 +83,12 @@ public:
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}. The number is not const or does not fit in UInt32", arguments[1].column->getName());
const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
if (!col_const_arr)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}. The array is not const", arguments[2].column->getName());
const ColumnArray * col_needles = checkAndGetColumn<ColumnArray>(arr_ptr.get());
const ColumnConst * col_needles_const = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
assert(static_cast<bool>(col_needles) ^ static_cast<bool>(col_needles_const));
if (col_haystack_const && col_needles)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support search with non-constant needles in constant haystack", name);
using ResultType = typename Impl::ResultType;
auto col_res = ColumnVector<ResultType>::create();
@ -94,10 +98,19 @@ public:
auto & offsets_res = col_offsets->getData();
// the implementations are responsible for resizing the output column
Array needles_arr = col_const_arr->getValue<Array>();
Impl::vectorConstant(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needles_arr, vec_res, offsets_res, edit_distance,
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
if (col_needles_const)
{
Array needles_arr = col_needles_const->getValue<Array>();
Impl::vectorConstant(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needles_arr, vec_res, offsets_res, edit_distance,
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
}
else
{
Impl::vectorVector(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), *col_needles, vec_res, offsets_res, edit_distance,
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
}
if constexpr (Impl::is_column_array)
return ColumnArray::create(std::move(col_res), std::move(col_offsets));

View File

@ -61,7 +61,7 @@ public:
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -81,11 +81,15 @@ public:
const ColumnPtr & arr_ptr = arguments[1].column;
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
assert(col_haystack_vector); // getReturnTypeImpl() checks the data type
const ColumnConst * col_haystack_const = typeid_cast<const ColumnConst *>(&*column_haystack);
assert(static_cast<bool>(col_haystack_vector) ^ static_cast<bool>(col_haystack_const));
const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
if (!col_const_arr)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}. The array is not const", arguments[1].column->getName());
const ColumnArray * col_needles = checkAndGetColumn<ColumnArray>(arr_ptr.get());
const ColumnConst * col_needles_const = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
assert(static_cast<bool>(col_needles) ^ static_cast<bool>(col_needles_const));
if (col_haystack_const && col_needles)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support search with non-constant needles in constant haystack", name);
using ResultType = typename Impl::ResultType;
auto col_res = ColumnVector<ResultType>::create();
@ -95,10 +99,20 @@ public:
auto & offsets_res = col_offsets->getData();
// the implementations are responsible for resizing the output column
Array needles_arr = col_const_arr->getValue<Array>();
Impl::vectorConstant(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needles_arr, vec_res, offsets_res,
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
if (col_needles_const)
{
Array needles_arr = col_needles_const->getValue<Array>();
Impl::vectorConstant(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needles_arr, vec_res, offsets_res,
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
}
else
{
Impl::vectorVector(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), *col_needles, vec_res, offsets_res,
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
}
if constexpr (Impl::is_column_array)
return ColumnArray::create(std::move(col_res), std::move(col_offsets));

View File

@ -79,6 +79,7 @@ struct MultiMatchAllIndicesImpl
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
offsets.resize(haystack_offsets.size());
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices=*/true, WithEditDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
@ -102,10 +103,10 @@ struct MultiMatchAllIndicesImpl
for (size_t i = 0; i < haystack_offsets_size; ++i)
{
UInt64 length = haystack_offsets[i] - offset - 1;
/// Hyperscan restriction.
// vectorscan restriction.
if (length > std::numeric_limits<UInt32>::max())
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
/// Scan, check, update the offsets array and the offset of haystack.
// scan, check, update the offsets array and the offset of haystack.
err = hs_scan(
hyperscan_regex->getDB(),
reinterpret_cast<const char *>(haystack_data.data()) + offset,
@ -123,6 +124,97 @@ struct MultiMatchAllIndicesImpl
throw Exception(
"multi-search all indices is not implemented when vectorscan is off",
ErrorCodes::NOT_IMPLEMENTED);
#endif // USE_VECTORSCAN
}
static void vectorVector(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const ColumnArray & needles_col,
PaddedPODArray<ResultType> & res,
PaddedPODArray<UInt64> & offsets,
bool allow_hyperscan,
size_t max_hyperscan_regexp_length,
size_t max_hyperscan_regexp_total_length)
{
vectorVector(haystack_data, haystack_offsets, needles_col, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
}
static void vectorVector(
[[maybe_unused]] const ColumnString::Chars & haystack_data,
[[maybe_unused]] const ColumnString::Offsets & haystack_offsets,
[[maybe_unused]] const ColumnArray & needles_col,
[[maybe_unused]] PaddedPODArray<ResultType> & res,
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
[[maybe_unused]] std::optional<UInt32> edit_distance,
bool allow_hyperscan,
[[maybe_unused]] size_t max_hyperscan_regexp_length,
[[maybe_unused]] size_t max_hyperscan_regexp_total_length)
{
if (!allow_hyperscan)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
#if USE_VECTORSCAN
size_t prev_haystack_offset = 0;
for (size_t i = 0; i < haystack_offsets.size(); ++i)
{
Field field;
needles_col.get(i, field);
Array & needles_arr = DB::get<Array &>(field);
std::vector<std::string_view> needles;
needles.reserve(needles_arr.size());
for (const auto & needle : needles_arr)
needles.emplace_back(needle.get<String>());
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
offsets.resize(haystack_offsets.size());
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices=*/true, WithEditDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
if (err != HS_SUCCESS)
throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
MultiRegexps::ScratchPtr smart_scratch(scratch);
auto on_match = [](unsigned int id,
unsigned long long /* from */, // NOLINT
unsigned long long /* to */, // NOLINT
unsigned int /* flags */,
void * context) -> int
{
static_cast<PaddedPODArray<ResultType>*>(context)->push_back(id);
return 0;
};
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
// vectorscan restriction.
if (cur_haystack_length > std::numeric_limits<UInt32>::max())
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
/// Scan, check, update the offsets array and the offset of haystack.
err = hs_scan(
hyperscan_regex->getDB(),
reinterpret_cast<const char *>(haystack_data.data()) + prev_haystack_offset,
cur_haystack_length,
0,
smart_scratch.get(),
on_match,
&res);
if (err != HS_SUCCESS)
throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
offsets[i] = res.size();
prev_haystack_offset = haystack_offsets[i];
}
#else
throw Exception(
"multi-search all indices is not implemented when vectorscan is off",
ErrorCodes::NOT_IMPLEMENTED);
#endif // USE_VECTORSCAN
}
};

View File

@ -75,7 +75,7 @@ struct MultiMatchAnyImpl
const ColumnString::Offsets & haystack_offsets,
const Array & needles_arr,
PaddedPODArray<ResultType> & res,
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
PaddedPODArray<UInt64> & /*offsets*/,
[[maybe_unused]] std::optional<UInt32> edit_distance,
bool allow_hyperscan,
size_t max_hyperscan_regexp_length,
@ -93,7 +93,7 @@ struct MultiMatchAnyImpl
res.resize(haystack_offsets.size());
#if USE_VECTORSCAN
const auto & hyperscan_regex = MultiRegexps::get<FindAnyIndex, WithEditDistance>(needles, edit_distance);
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices*/ FindAnyIndex, WithEditDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
@ -120,10 +120,10 @@ struct MultiMatchAnyImpl
for (size_t i = 0; i < haystack_offsets_size; ++i)
{
UInt64 length = haystack_offsets[i] - offset - 1;
/// Vectorscan restriction.
// vectorscan restriction.
if (length > std::numeric_limits<UInt32>::max())
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
/// Zero the result, scan, check, update the offset.
// zero the result, scan, check, update the offset.
res[i] = 0;
err = hs_scan(
hyperscan_regex->getDB(),
@ -157,6 +157,141 @@ struct MultiMatchAnyImpl
res[i] = j + 1;
}
}
#endif // USE_VECTORSCAN
}
static void vectorVector(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const ColumnArray & needles_col,
PaddedPODArray<ResultType> & res,
PaddedPODArray<UInt64> & offsets,
bool allow_hyperscan,
size_t max_hyperscan_regexp_length,
size_t max_hyperscan_regexp_total_length)
{
vectorVector(haystack_data, haystack_offsets, needles_col, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
}
static void vectorVector(
[[maybe_unused]] const ColumnString::Chars & haystack_data,
[[maybe_unused]] const ColumnString::Offsets & haystack_offsets,
[[maybe_unused]] const ColumnArray & needles_col,
[[maybe_unused]] PaddedPODArray<ResultType> & res,
PaddedPODArray<UInt64> & /*offsets*/,
[[maybe_unused]] std::optional<UInt32> edit_distance,
bool allow_hyperscan,
[[maybe_unused]] size_t max_hyperscan_regexp_length,
[[maybe_unused]] size_t max_hyperscan_regexp_total_length)
{
if (!allow_hyperscan)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
res.resize(haystack_offsets.size());
#if USE_VECTORSCAN
size_t prev_haystack_offset = 0;
for (size_t i = 0; i < haystack_offsets.size(); ++i)
{
Field field;
needles_col.get(i, field);
Array & needles_arr = DB::get<Array &>(field);
std::vector<std::string_view> needles;
needles.reserve(needles_arr.size());
for (const auto & needle : needles_arr)
needles.emplace_back(needle.get<String>());
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices*/ FindAnyIndex, WithEditDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
if (err != HS_SUCCESS)
throw Exception("Could not clone scratch space for vectorscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
MultiRegexps::ScratchPtr smart_scratch(scratch);
auto on_match = []([[maybe_unused]] unsigned int id,
unsigned long long /* from */, // NOLINT
unsigned long long /* to */, // NOLINT
unsigned int /* flags */,
void * context) -> int
{
if constexpr (FindAnyIndex)
*reinterpret_cast<ResultType *>(context) = id;
else if constexpr (FindAny)
*reinterpret_cast<ResultType *>(context) = 1;
/// Once we hit the callback, there is no need to search for others.
return 1;
};
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
// vectorscan restriction.
if (cur_haystack_length > std::numeric_limits<UInt32>::max())
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
// zero the result, scan, check, update the offset.
res[i] = 0;
err = hs_scan(
hyperscan_regex->getDB(),
reinterpret_cast<const char *>(haystack_data.data()) + prev_haystack_offset,
cur_haystack_length,
0,
smart_scratch.get(),
on_match,
&res[i]);
if (err != HS_SUCCESS && err != HS_SCAN_TERMINATED)
throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
prev_haystack_offset = haystack_offsets[i];
}
#else
// fallback if vectorscan is not compiled
if constexpr (WithEditDistance)
throw Exception(
"Edit distance multi-search is not implemented when vectorscan is off",
ErrorCodes::NOT_IMPLEMENTED);
memset(res.data(), 0, res.size() * sizeof(res.front()));
size_t prev_haystack_offset = 0;
for (size_t i = 0; i < haystack_offsets.size(); ++i)
{
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
Field field;
needles_col.get(i, field);
Array & needles_arr = DB::get<Array &>(field);
std::vector<std::string_view> needles;
needles.reserve(needles_arr.size());
for (const auto & needle : needles_arr)
needles.emplace_back(needle.get<String>());
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
for (size_t j = 0; j < needles_arr.size(); ++j)
{
String needle = needles_arr[j].safeGet<String>();
const auto & regexp = Regexps::Regexp(Regexps::createRegexp</*like*/ false, /*no_capture*/ true, /*case_insensitive*/ false>(needle));
const bool match = regexp.getRE2()->Match(
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
0,
cur_haystack_length,
re2_st::RE2::UNANCHORED,
nullptr,
0);
if constexpr (FindAny)
res[i] |= match;
else if (FindAnyIndex && match)
res[i] = j + 1;
}
prev_haystack_offset = haystack_offsets[i];
}
#endif // USE_VECTORSCAN
}
};

View File

@ -10,6 +10,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -66,6 +67,12 @@ struct MultiSearchFirstIndexImpl
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
template <typename... Args>
static void vectorVector(Args &&...)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needles", name);
}
};
}

View File

@ -10,6 +10,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -75,6 +76,12 @@ struct MultiSearchFirstPositionImpl
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
template <typename... Args>
static void vectorVector(Args &&...)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needles", name);
}
};
}

View File

@ -10,6 +10,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -65,6 +66,12 @@ struct MultiSearchImpl
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
template <typename... Args>
static void vectorVector(Args &&...)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needles", name);
}
};
}

View File

@ -1,3 +1,4 @@
-- With const pattern
1
1
1
@ -623,3 +624,624 @@ All tests above must return 1, all tests below return something.
[1,3]
[]
[1,2,3]
-- With non-const pattern
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1

View File

@ -1,5 +1,7 @@
-- Tags: no-fasttest, use-vectorscan
SELECT '-- With const pattern';
select 0 = multiMatchAny(materialize('mpnsguhwsitzvuleiwebwjfitmsg'), ['wbirxqoabpblrnvvmjizj', 'cfcxhuvrexyzyjsh', 'oldhtubemyuqlqbwvwwkwin', 'bumoozxdkjglzu', 'intxlfohlxmajjomw', 'dxkeghohv', 'arsvmwwkjeopnlwnan', 'ouugllgowpqtaxslcopkytbfhifaxbgt', 'hkedmjlbcrzvryaopjqdjjc', 'tbqkljywstuahzh', 'o', 'wowoclosyfcuwotmvjygzuzhrery', 'vpefjiffkhlggntcu', 'ytdixvasrorhripzfhjdmlhqksmctyycwp']) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('qjjzqexjpgkglgxpzrbqbnskq'), ['vaiatcjacmlffdzsejpdareqzy', 'xspcfzdufkmecud', 'bcvtbuqtctq', 'nkcopwbfytgemkqcfnnno', 'dylxnzuyhq', 'tno', 'scukuhufly', 'cdyquzuqlptv', 'ohluyfeksyxepezdhqmtfmgkvzsyph', 'ualzwtahvqvtijwp', 'jg', 'gwbawqlngzcknzgtmlj', 'qimvjcgbkkp', 'eaedbcgyrdvv', 'qcwrncjoewwedyyewcdkh', 'uqcvhngoqngmitjfxpznqomertqnqcveoqk', 'ydrgjiankgygpm', 'axepgap']) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('fdkmtqmxnegwvnjhghjq'), ['vynkybvdmhgeezybbdqfrukibisj', 'knazzamgjjpavwhvdkwigykh', 'peumnifrmdhhmrqqnemw', 'lmsnyvqoisinlaqobxojlwfbi', 'oqwfzs', 'dymudxxeodwjpgbibnkvr', 'vomtfsnizkplgzktqyoiw', 'yoyfuhlpgrzds', 'cefao', 'gi', 'srpgxfjwl', 'etsjusdeiwbfe', 'ikvtzdopxo', 'ljfkavrau', 'soqdhxtenfrkmeic', 'ktprjwfcelzbup', 'pcvuoddqwsaurcqdtjfnczekwni', 'agkqkqxkfbkfgyqliahsljim']) from system.numbers limit 10;
@ -92,3 +94,96 @@ SELECT [1] = multiMatchAllIndices(materialize('/odezhda-dlya-bega/'), ['/odezhda
SELECT [] = multiMatchAllIndices(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']);
SELECT 'All tests above must return 1, all tests below return something.';
SELECT arraySort(multiMatchAllIndices(arrayJoin(['aaaa', 'aaaaaa', 'bbbb', 'aaaaaaaaaaaaaa']), ['.*aa.*aaa.*', 'aaaaaa{2}', '(aa){3}']));
SELECT '-- With non-const pattern';
select 0 = multiMatchAny(materialize('mpnsguhwsitzvuleiwebwjfitmsg'), materialize(['wbirxqoabpblrnvvmjizj', 'cfcxhuvrexyzyjsh', 'oldhtubemyuqlqbwvwwkwin', 'bumoozxdkjglzu', 'intxlfohlxmajjomw', 'dxkeghohv', 'arsvmwwkjeopnlwnan', 'ouugllgowpqtaxslcopkytbfhifaxbgt', 'hkedmjlbcrzvryaopjqdjjc', 'tbqkljywstuahzh', 'o', 'wowoclosyfcuwotmvjygzuzhrery', 'vpefjiffkhlggntcu', 'ytdixvasrorhripzfhjdmlhqksmctyycwp'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('qjjzqexjpgkglgxpzrbqbnskq'), materialize(['vaiatcjacmlffdzsejpdareqzy', 'xspcfzdufkmecud', 'bcvtbuqtctq', 'nkcopwbfytgemkqcfnnno', 'dylxnzuyhq', 'tno', 'scukuhufly', 'cdyquzuqlptv', 'ohluyfeksyxepezdhqmtfmgkvzsyph', 'ualzwtahvqvtijwp', 'jg', 'gwbawqlngzcknzgtmlj', 'qimvjcgbkkp', 'eaedbcgyrdvv', 'qcwrncjoewwedyyewcdkh', 'uqcvhngoqngmitjfxpznqomertqnqcveoqk', 'ydrgjiankgygpm', 'axepgap'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('fdkmtqmxnegwvnjhghjq'), materialize(['vynkybvdmhgeezybbdqfrukibisj', 'knazzamgjjpavwhvdkwigykh', 'peumnifrmdhhmrqqnemw', 'lmsnyvqoisinlaqobxojlwfbi', 'oqwfzs', 'dymudxxeodwjpgbibnkvr', 'vomtfsnizkplgzktqyoiw', 'yoyfuhlpgrzds', 'cefao', 'gi', 'srpgxfjwl', 'etsjusdeiwbfe', 'ikvtzdopxo', 'ljfkavrau', 'soqdhxtenfrkmeic', 'ktprjwfcelzbup', 'pcvuoddqwsaurcqdtjfnczekwni', 'agkqkqxkfbkfgyqliahsljim'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('khljxzxlpcrxpkrfybbfk'), materialize(['', 'lpc', 'rxpkrfybb', 'crxp', '', 'pkr', 'jxzxlpcrxpkrf', '', 'xzxlpcr', 'xpk', 'fyb', 'xzxlpcrxpkrfybbfk', 'k', 'lpcrxp', 'ljxzxlpcr', 'r', 'pkr', 'fk'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('rbrizgjbigvzfnpgmpkqxoqxvdj'), materialize(['ee', 'cohqnb', 'msol', 'yhlujcvhklnhuomy', 'ietn', 'vgmnlkcsybtokrepzrm', 'wspiryefojxysgrzsxyrluykxfnnbzdstcel', 'mxisnsivndbefqxwznimwgazuulupbaihavg', 'vpzdjvqqeizascxmzdhuq', 'pgvncohlxcqjhfkm', 'mbaypcnfapltsegquurahlsruqvipfhrhq', 'ioxjbcyyqujfveujfhnfdfokfcrlsincjbdt', 'cnvlujyowompdrqjwjx', 'wobwed', 'kdfhaoxiuifotmptcmdbk', 'leoamsnorcvtlmokdomkzuo', 'jjw', 'ogugysetxuqmvggneosbsfbonszepsatq'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('uymwxzyjbfegbhgswiqhinf'), materialize(['lizxzbzlwljkr', 'ukxygktlpzuyijcqeqktxenlaqi', 'onperabgbdiafsxwbvpjtyt', 'xfqgoqvhqph', 'aflmcwabtwgmajmmqelxwkaolyyhmdlc', 'yfz', 'meffuiaicvwed', 'hhzvgmifzamgftkifaeowayjrnnzw', 'nwewybtajv', 'ectiye', 'epjeiljegmqqjncubj', 'zsjgftqjrn', 'pssng', 'raqoarfhdoeujulvqmdo'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('omgghgnzjmecpzqmtcvw'), materialize(['fjhlzbszodmzavzg', 'gfofrnwrxprkfiokv', 'jmjiiqpgznlmyrxwewzqzbe', 'pkyrsqkltlmxr', 'crqgkgqkkyujcyoc', 'endagbcxwqhueczuasykmajfsvtcmh', 'xytmxtrnkdysuwltqomehddp', 'etmdxyyfotfyifwvbykghijvwv', 'mwqtgrncyhkfhjdg', 'iuvymofrqpp', 'pgllsdanlhzqhkstwsmzzftp', 'disjylcceufxtjdvhy'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('mznihnmshftvnmmhnrulizzpslq'), materialize(['nrul', 'mshftvnmmhnr', 'z', 'mhnrulizzps', 'hftvnmmhnrul', 'ihnmshftvnmmhnrulizzp', 'izz', '', 'uli', 'nihnmshftvnmmhnru', 'hnrulizzp', 'nrulizz'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('ruqmqrsxrbftvruvahonradau'), materialize(['uqmqrsxrbft', 'ftv', 'tvruvahonrad', 'mqrsxrbftvruvahon', 'rbftvruvah', 'qrsxrbftvru', 'o', 'ahonradau', 'a', 'ft', '', 'u', 'rsxrbftvruvahonradau', 'ruvahon', 'bftvruvahonradau', 'qrsxrbftvru', 't', 'vahonrada', 'vruvahonradau', 'onra'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('gpsevxtcoeexrltyzduyidmtzxf'), materialize(['exrltyzduyid', 'vxtcoeexrltyz', 'xr', 'ltyzduyidmt', 'yzduy', 'exr', 'coeexrltyzduy', 'coeexrltyzduy', 'rlty', 'rltyzduyidm', 'exrltyz', 'xtcoeexrlty', 'vxtcoeexrltyzduyidm', '', 'coeexrl', 'sevxtcoeexrltyzdu', 'dmt', ''])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('dyhycfhzyewaikgursyxfkuv'), materialize(['sktnofpugrmyxmbizzrivmhn', 'fhlgadpoqcvktbfzncxbllvwutdawmw', 'eewzjpcgzrqmltbgmhafwlwqb', 'tpogbkyj', 'rtllntxjgkzs', 'mirbvsqexscnzglogigbujgdwjvcv', 'iktwpgjsakemewmahgqza', 'xgfvzkvqgiuoihjjnxwwpznxhz', 'nxaumpaknreklbwynvxdsmatjekdlxvklh', 'zadzwqhgfxqllihuudozxeixyokhny', 'tdqpgfpzexlkslodps', 'slztannufxaabqfcjyfquafgfhfb', 'xvjldhfuwurvkb', 'aecv', 'uycfsughpikqsbcmwvqygdyexkcykhbnau', 'jr'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('vbcsettndwuntnruiyclvvwoo'), materialize(['dwuntnru', '', 'ttndwuntnruiyclvv', 'ntnr', 'nruiyclvvw', 'wo', '', 'bcsettndwuntnruiycl', 'yc', 'untnruiyclvvw', 'csettndwuntnr', 'ntnruiyclvvwo'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('pqqnugshlczcuxhpjxjbcnro'), materialize(['dpeedqy', 'rtsc', 'jdgla', 'qkgudqjiyzvlvsj', 'xmfxawhijgxxtydbd', 'ebgzazqthb', 'wyrjhvhwzhmpybnylirrn', 'iviqbyuclayqketooztwegtkgwnsezfl', 'bhvidy', 'hijctxxweboq', 't', 'osnzfbziidteiaifgaanm'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('loqchlxspwuvvccucskuytr'), materialize(['', 'k', 'qchlxspwu', 'u', 'hlxspwuvv', 'wuvvccucsku', 'vcc', 'uyt', 'uvv', 'spwu', 'ytr', 'wuvvccucs', 'xspwuv', 'lxspwuvvccuc', 'spwuvvccu', 'oqchlxspwuvvccucskuy'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('pjjyzupzwllshlnatiujmwvaofr'), materialize(['lnatiujmwvao', '', 'zupzwllsh', 'nati', 'wllshl', 'hlnatiujmwv', 'mwvao', 'shlnat', 'ati', 'wllshlnatiujmwvao', 'wllshlnatiujmwvaofr', 'nat'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('iketunkleyaqaxdlocci'), materialize(['nkleyaqaxd', 'etunkleyaq', 'yaqaxdlocci', 'tunkleyaq', 'eyaqaxdlocc', 'leyaq', 'nkleyaqaxdl', 'tunkleya', 'kleyaqa', 'etunkleya', 'leyaqa', 'dlo', 'yaqa', 'leyaqaxd', 'etunkleyaq', ''])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('drqianqtangmgbdwruvblkqd'), materialize(['wusajejyucamkyl', 'wsgibljugzrpkniliy', 'lhwqqiuafwffyersqjgjvvvfurx', 'jfokpzzxfdonelorqu', 'ccwkpcgac', 'jmyulqpndkmzbfztobwtm', 'rwrgfkccgxht', 'ggldjecrgbngkonphtcxrkcviujihidjx', 'spwweavbiokizv', 'lv', 'krb', 'vstnhvkbwlqbconaxgbfobqky', 'pvxwdc', 'thrl', 'ahsblffdveamceonqwrbeyxzccmux', 'yozji', 'oejtaxwmeovtqtz', 'zsnzznvqpxdvdxhznxrjn', 'hse', 'kcmkrccxmljzizracxwmpoaggywhdfpxkq'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('yasnpckniistxcejowfijjsvkdajz'), materialize(['slkpxhtsmrtvtm', 'crsbq', 'rdeshtxbfrlfwpsqojassxmvlfbzefldavmgme', 'ipetilcbpsfroefkjirquciwtxhrimbmwnlyv', 'knjpwkmdwbvdbapuyqbtsw', 'horueidziztxovqhsicnklmharuxhtgrsr', 'ofohrgpz', 'oneqnwyevbaqsonrcpmxcynflojmsnix', 'shg', 'nglqzczevgevwawdfperpeytuodjlf'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('ueptpscfgxhplwsueckkxs'), materialize(['ohhygchclbpcdwmftperprn', 'dvpjdqmqckekndvcerqrpkxen', 'lohhvarnmyi', 'zppd', 'qmqxgfewitsunbuhffozcpjtc', 'hsjbioisycsrawktqssjovkmltxodjgv', 'dbzuunwbkrtosyvctdujqtvaawfnvuq', 'gupbvpqthqxae', 'abjdmijaaiasnccgxttmqdsz', 'uccyumqoyqe', 'kxxliepyzlc', 'wbqcqtbyyjbqcgdbpkmzugksmcxhvr', 'piedxm', 'uncpphzoif', 'exkdankwck', 'qeitzozdrqopsergzr', 'hesgrhaftgesnzflrrtjdobxhbepjoas', 'wfpexx'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('ldrzgttlqaphekkkdukgngl'), materialize(['gttlqaphekkkdukgn', 'ekkkd', 'gttlqaphe', 'qaphek', 'h', 'kdu', 'he', 'phek', '', 'drzgttlqaphekkkd'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('ololo'), materialize(['ololo', 'ololo', 'ololo']));
SELECT 1 = multiMatchAny(materialize('khljxzxlpcrxpkrfybbfk'), materialize(['k']));
select 1 = multiMatchAny(materialize(''), materialize(['']));
select 0 = multiMatchAny(materialize(''), materialize(['some string']));
select 1 = multiMatchAny(materialize('abc'), materialize(['']));
select 1 = multiMatchAny(materialize('abc'), materialize([''])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['defgh']));
select 0 = multiMatchAny(materialize('abc'), materialize(['defg']));
select 0 = multiMatchAny(materialize('abc'), materialize(['def']));
select 0 = multiMatchAny(materialize('abc'), materialize(['de']));
select 0 = multiMatchAny(materialize('abc'), materialize(['d']));
select 1 = multiMatchAny(materialize('abc'), materialize(['b'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abc'), materialize(['bc'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['bcde'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['bcdef'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['bcdefg'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['bcdefgh'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['abcdefg'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['abcdef'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['abcde'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['abcd'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abc'), materialize(['abc'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abc'), materialize(['ab'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abc'), materialize(['a'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abcd'), materialize(['c'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abcd'), materialize(['cd'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abcd'), materialize(['cde'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abcd'), materialize(['cdef'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abcd'), materialize(['cdefg'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abcd'), materialize(['cdefgh'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['defgh'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['defg'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['def'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['de'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abc'), materialize(['d'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abc'), materialize(['...'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('a\nbc'), materialize(['a?bc'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('a\nbc'), materialize(['a.bc'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('a\0bc'), materialize(['a?bc'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('a\0bc'), materialize(['a.bc'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abcdef'), materialize(['a.....'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('abcdef'), materialize(['a......'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abcdef'), materialize(['a......', 'a.....'])) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('aaaa'), materialize(['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}'])) from system.numbers limit 10;
select 1 = multiMatchAny(materialize('abc'), materialize(['a\0d'])) from system.numbers limit 10;
select 1 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), materialize(['google', 'unian1'])) from system.numbers limit 10;
select 2 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), materialize(['google1', 'unian'])) from system.numbers limit 10;
select 0 != multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), materialize(['.*goo.*', '.*yan.*'])) from system.numbers limit 10;
select 5 = multiMatchAnyIndex(materialize('vladizlvav dabe don\'t heart me no more'), materialize(['what', 'is', 'love', 'baby', 'no mo??', 'dont', 'h.rt me'])) from system.numbers limit 10;
SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), materialize(['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']));
SELECT 1 = multiMatchAny(materialize('фабрикант'), materialize(['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']));
-- All indices tests
SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), materialize(['.*goo.*', '.*yan.*']))) from system.numbers limit 5;
SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), materialize(['.*goo.*', 'neverexisted', '.*yan.*']))) from system.numbers limit 5;
SELECT [] = multiMatchAllIndices(materialize('gogleuedeuniangoogle'), materialize(['neverexisted', 'anotherone', 'andanotherone'])) from system.numbers limit 5;
SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices(materialize('фабрикант'), materialize(['', 'рикан', 'а', 'f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]'])));
SELECT [1] = multiMatchAllIndices(materialize('/odezhda-dlya-bega/'), materialize(['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']));
SELECT [] = multiMatchAllIndices(materialize('aaaa'), materialize(['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']));

View File

@ -6,4 +6,9 @@
1
1
1
1
0
1
1
1
0

View File

@ -1,9 +1,16 @@
-- Tags: no-debug
SET allow_hyperscan = 1;
SELECT multiMatchAny(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), ['hel+o', 'w(or)*ld']);
SELECT multiMatchAny(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), materialize(['hel+o', 'w(or)*ld']));
SET allow_hyperscan = 0;
SELECT multiMatchAny(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), ['hel+o', 'w(or)*ld']); -- { serverError 446 }
SELECT multiMatchAny(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), materialize(['hel+o', 'w(or)*ld'])); -- { serverError 446 }
SELECT multiMatchAllIndices(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), ['hel+o', 'w(or)*ld']); -- { serverError 446 }
SELECT multiMatchAllIndices(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), materialize(['hel+o', 'w(or)*ld'])); -- { serverError 446 }
SELECT multiSearchAny(arrayJoin(['hello', 'world', 'hello, world', 'abc']), ['hello', 'world']);

View File

@ -1,3 +1,7 @@
1
1
2
2
[2]
[2]
[1,8]

View File

@ -1,3 +1,12 @@
-- Tags: no-fasttest
SELECT multiMatchAny('goodbye', ['^hello[, ]+world$', 'go+d *bye', 'w(or)+ld']);
SELECT multiFuzzyMatchAny('goodbye', 1, ['^hello[, ]+world$', 'go+d *bye', 'w(or)+ld']);
SELECT multiMatchAnyIndex('goodbye', ['^hello[, ]+world$', 'go+d *bye', 'w(or)+ld']);
SELECT multiFuzzyMatchAnyIndex('goodbye', 1, ['^hello[, ]+world$', 'go+d *bye', 'w(or)+ld']);
SELECT multiMatchAllIndices('goodbye', ['^hello[, ]+world$', 'go+d *bye', 'w(or)+ld']);
SELECT multiFuzzyMatchAllIndices('goodbye', 1, ['^hello[, ]+world$', 'go+d *bye', 'w(or)+ld']);
SELECT multiSearchAllPositions('hello, world', ['hello', 'world']);

View File

@ -1,3 +1,7 @@
- const pattern
0
0
0
1
1
1
@ -9,6 +13,22 @@
1
1
1
0
1
1
1
1
0
1
2
2
1
[2,3,4]
[]
- non-const pattern
0
0
0
1
1
1
@ -18,17 +38,15 @@
1
1
1
0
1
1
1
1
0
1
2
2
1
1
1
1
1
1
1
1
1
[2,3,4]
[]

View File

@ -2,30 +2,60 @@
SET send_logs_level = 'fatal';
select 0 = multiFuzzyMatchAny('abc', 0, ['a1c']) from system.numbers limit 5;
select 1 = multiFuzzyMatchAny('abc', 1, ['a1c']) from system.numbers limit 5;
select 1 = multiFuzzyMatchAny('abc', 2, ['a1c']) from system.numbers limit 5;
select 1 = multiFuzzyMatchAny('abc', 3, ['a1c']) from system.numbers limit 5; -- { serverError 36 }
select 1 = multiFuzzyMatchAny('abc', 4, ['a1c']) from system.numbers limit 5; -- { serverError 36 }
SELECT '- const pattern';
select 1 = multiFuzzyMatchAny('leftabcright', 1, ['a1c']) from system.numbers limit 5;
-- run queries multiple times to test the pattern caching
select multiFuzzyMatchAny('abc', 0, ['a1c']) from system.numbers limit 3;
select multiFuzzyMatchAny('abc', 1, ['a1c']) from system.numbers limit 3;
select multiFuzzyMatchAny('abc', 2, ['a1c']) from system.numbers limit 3;
select multiFuzzyMatchAny('abc', 3, ['a1c']) from system.numbers limit 3; -- { serverError 36 }
select multiFuzzyMatchAny('abc', 4, ['a1c']) from system.numbers limit 3; -- { serverError 36 }
select 1 = multiFuzzyMatchAny('hello some world', 0, ['^hello.*world$']);
select 1 = multiFuzzyMatchAny('hallo some world', 1, ['^hello.*world$']);
select 0 = multiFuzzyMatchAny('halo some wrld', 2, ['^hello.*world$']);
select 1 = multiFuzzyMatchAny('halo some wrld', 2, ['^hello.*world$', '^halo.*world$']);
select 1 = multiFuzzyMatchAny('halo some wrld', 2, ['^halo.*world$', '^hello.*world$']);
select 1 = multiFuzzyMatchAny('halo some wrld', 3, ['^hello.*world$']);
select 1 = multiFuzzyMatchAny('hello some world', 10, ['^hello.*world$']); -- { serverError 36 }
select 1 = multiFuzzyMatchAny('hello some world', -1, ['^hello.*world$']); -- { serverError 43 }
select 1 = multiFuzzyMatchAny('hello some world', 10000000000, ['^hello.*world$']); -- { serverError 44 }
select 1 = multiFuzzyMatchAny('http://hyperscan_is_nice.ru/st', 2, ['http://hyperscan_is_nice.ru/(st\\d\\d$|st\\d\\d\\.|st1[0-4]\\d|st150|st\\d$|gl|rz|ch)']);
select 0 = multiFuzzyMatchAny('string', 0, ['zorro$', '^tring', 'in$', 'how.*', 'it{2}', 'works']);
select multiFuzzyMatchAny('leftabcright', 1, ['a1c']) from system.numbers limit 3;
select 1 = multiFuzzyMatchAny('string', 1, ['zorro$', '^tring', 'ip$', 'how.*', 'it{2}', 'works']);
select 2 = multiFuzzyMatchAnyIndex('string', 1, ['zorro$', '^tring', 'ip$', 'how.*', 'it{2}', 'works']);
select 2 = multiFuzzyMatchAnyIndex('halo some wrld', 2, ['^hello.*world$', '^halo.*world$']);
select 1 = multiFuzzyMatchAnyIndex('halo some wrld', 2, ['^halo.*world$', '^hello.*world$']);
select multiFuzzyMatchAny('hello some world', 0, ['^hello.*world$']);
select multiFuzzyMatchAny('hallo some world', 1, ['^hello.*world$']);
select multiFuzzyMatchAny('halo some wrld', 2, ['^hello.*world$']);
select multiFuzzyMatchAny('halo some wrld', 2, ['^hello.*world$', '^halo.*world$']);
select multiFuzzyMatchAny('halo some wrld', 2, ['^halo.*world$', '^hello.*world$']);
select multiFuzzyMatchAny('halo some wrld', 3, ['^hello.*world$']);
select multiFuzzyMatchAny('hello some world', 10, ['^hello.*world$']); -- { serverError 36 }
select multiFuzzyMatchAny('hello some world', -1, ['^hello.*world$']); -- { serverError 43 }
select multiFuzzyMatchAny('hello some world', 10000000000, ['^hello.*world$']); -- { serverError 44 }
select multiFuzzyMatchAny('http://hyperscan_is_nice.de/st', 2, ['http://hyperscan_is_nice.de/(st\\d\\d$|st\\d\\d\\.|st1[0-4]\\d|st150|st\\d$|gl|rz|ch)']);
select multiFuzzyMatchAny('string', 0, ['zorro$', '^tring', 'in$', 'how.*', 'it{2}', 'works']);
select multiFuzzyMatchAny('string', 1, ['zorro$', '^tring', 'ip$', 'how.*', 'it{2}', 'works']);
select multiFuzzyMatchAnyIndex('string', 1, ['zorro$', '^tring', 'ip$', 'how.*', 'it{2}', 'works']);
select multiFuzzyMatchAnyIndex('halo some wrld', 2, ['^hello.*world$', '^halo.*world$']);
select multiFuzzyMatchAnyIndex('halo some wrld', 2, ['^halo.*world$', '^hello.*world$']);
--
select arraySort(multiFuzzyMatchAllIndices('halo some wrld', 2, ['some random string', '^halo.*world$', '^halo.*world$', '^halo.*world$', '^hallllo.*world$']));
select multiFuzzyMatchAllIndices('halo some wrld', 2, ['^halllllo.*world$', 'some random string']);
select [2, 3, 4] = arraySort(multiFuzzyMatchAllIndices('halo some wrld', 2, ['some random string', '^halo.*world$', '^halo.*world$', '^halo.*world$', '^hallllo.*world$']));
select [] = multiFuzzyMatchAllIndices('halo some wrld', 2, ['^halllllo.*world$', 'some random string']);
SELECT '- non-const pattern';
select multiFuzzyMatchAny(materialize('abc'), 0, materialize(['a1c'])) from system.numbers limit 3;
select multiFuzzyMatchAny(materialize('abc'), 1, materialize(['a1c'])) from system.numbers limit 3;
select multiFuzzyMatchAny(materialize('abc'), 2, materialize(['a1c'])) from system.numbers limit 3;
select multiFuzzyMatchAny(materialize('abc'), 3, materialize(['a1c'])) from system.numbers limit 3; -- { serverError 36}
select multiFuzzyMatchAny(materialize('abc'), 4, materialize(['a1c'])) from system.numbers limit 3; -- { serverError 36}
select multiFuzzyMatchAny(materialize('leftabcright'), 1, materialize(['a1c']));
select multiFuzzyMatchAny(materialize('hello some world'), 0, materialize(['^hello.*world$']));
select multiFuzzyMatchAny(materialize('hallo some world'), 1, materialize(['^hello.*world$']));
select multiFuzzyMatchAny(materialize('halo some wrld'), 2, materialize(['^hello.*world$']));
select multiFuzzyMatchAny(materialize('halo some wrld'), 2, materialize(['^hello.*world$', '^halo.*world$']));
select multiFuzzyMatchAny(materialize('halo some wrld'), 2, materialize(['^halo.*world$', '^hello.*world$']));
select multiFuzzyMatchAny(materialize('halo some wrld'), 3, materialize(['^hello.*world$']));
select multiFuzzyMatchAny(materialize('hello some world'), 10, materialize(['^hello.*world$'])); -- { serverError 36 }
select multiFuzzyMatchAny(materialize('hello some world'), -1, materialize(['^hello.*world$'])); -- { serverError 43 }
select multiFuzzyMatchAny(materialize('hello some world'), 10000000000, materialize(['^hello.*world$'])); -- { serverError 44 }
select multiFuzzyMatchAny(materialize('http://hyperscan_is_nice.de/st'), 2, materialize(['http://hyperscan_is_nice.de/(st\\d\\d$|st\\d\\d\\.|st1[0-4]\\d|st150|st\\d$|gl|rz|ch)']));
select multiFuzzyMatchAny(materialize('string'), 0, materialize(['zorro$', '^tring', 'in$', 'how.*', 'it{2}', 'works']));
select multiFuzzyMatchAny(materialize('string'), 1, materialize(['zorro$', '^tring', 'ip$', 'how.*', 'it{2}', 'works']));
select multiFuzzyMatchAnyIndex(materialize('string'), 1, materialize(['zorro$', '^tring', 'ip$', 'how.*', 'it{2}', 'works']));
select multiFuzzyMatchAnyIndex(materialize('halo some wrld'), 2, materialize(['^hello.*world$', '^halo.*world$']));
select multiFuzzyMatchAnyIndex(materialize('halo some wrld'), 2, materialize(['^halo.*world$', '^hello.*world$']));
select arraySort(multiFuzzyMatchAllIndices(materialize('halo some wrld'), 2, materialize(['some random string', '^halo.*world$', '^halo.*world$', '^halo.*world$', '^hallllo.*world$'])));
select multiFuzzyMatchAllIndices(materialize('halo some wrld'), 2, materialize(['^halllllo.*world$', 'some random string']));

View File

@ -1,3 +1,11 @@
- const pattern
1
1
[1]
1
1
[1]
- non-const pattern
1
1
[1]

View File

@ -3,6 +3,8 @@
set max_hyperscan_regexp_length = 1;
set max_hyperscan_regexp_total_length = 1;
SELECT '- const pattern';
select multiMatchAny('123', ['1']);
select multiMatchAny('123', ['12']); -- { serverError 36 }
select multiMatchAny('123', ['1', '2']); -- { serverError 36 }
@ -26,3 +28,29 @@ select multiFuzzyMatchAnyIndex('123', 0, ['1', '2']); -- { serverError 36 }
select multiFuzzyMatchAllIndices('123', 0, ['1']);
select multiFuzzyMatchAllIndices('123', 0, ['12']); -- { serverError 36 }
select multiFuzzyMatchAllIndices('123', 0, ['1', '2']); -- { serverError 36 }
SELECT '- non-const pattern';
select multiMatchAny(materialize('123'), materialize(['1']));
select multiMatchAny(materialize('123'), materialize(['12'])); -- { serverError 36 }
select multiMatchAny(materialize('123'), materialize(['1', '2'])); -- { serverError 36 }
select multiMatchAnyIndex(materialize('123'), materialize(['1']));
select multiMatchAnyIndex(materialize('123'), materialize(['12'])); -- { serverError 36 }
select multiMatchAnyIndex(materialize('123'), materialize(['1', '2'])); -- { serverError 36 }
select multiMatchAllIndices(materialize('123'), materialize(['1']));
select multiMatchAllIndices(materialize('123'), materialize(['12'])); -- { serverError 36 }
select multiMatchAllIndices(materialize('123'), materialize(['1', '2'])); -- { serverError 36 }
select multiFuzzyMatchAny(materialize('123'), 0, materialize(['1']));
select multiFuzzyMatchAny(materialize('123'), 0, materialize(['12'])); -- { serverError 36 }
select multiFuzzyMatchAny(materialize('123'), 0, materialize(['1', '2'])); -- { serverError 36 }
select multiFuzzyMatchAnyIndex(materialize('123'), 0, materialize(['1']));
select multiFuzzyMatchAnyIndex(materialize('123'), 0, materialize(['12'])); -- { serverError 36 }
select multiFuzzyMatchAnyIndex(materialize('123'), 0, materialize(['1', '2'])); -- { serverError 36 }
select multiFuzzyMatchAllIndices(materialize('123'), 0, materialize(['1']));
select multiFuzzyMatchAllIndices(materialize('123'), 0, materialize(['12'])); -- { serverError 36 }
select multiFuzzyMatchAllIndices(materialize('123'), 0, materialize(['1', '2'])); -- { serverError 36 }