mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Move check for regexp array size into implementations
- This is not needed for non-const regexp array arguments (the cardinality of arrays is fixed per column) but it cleans up the code and runs the check only in functions which have restrictions on the number of patterns. - For functions using hyperscans, it was checked that the number of regexes is < 2^32. Removed the check because I don't think anyone will every specify 4 billion patterns.
This commit is contained in:
parent
7913edc172
commit
3478db9fb6
@ -21,16 +21,13 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int FUNCTION_NOT_ALLOWED;
|
||||
}
|
||||
|
||||
|
||||
template <typename Impl, size_t LimitArgs>
|
||||
template <typename Impl>
|
||||
class FunctionsMultiStringFuzzySearch : public IFunction
|
||||
{
|
||||
static_assert(LimitArgs > 0);
|
||||
|
||||
public:
|
||||
static constexpr auto name = Impl::name;
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
@ -96,11 +93,6 @@ public:
|
||||
|
||||
Array src_arr = col_const_arr->getValue<Array>();
|
||||
|
||||
if (src_arr.size() > LimitArgs)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be at most {}",
|
||||
getName(), std::to_string(src_arr.size()), std::to_string(LimitArgs));
|
||||
|
||||
std::vector<std::string_view> refs;
|
||||
refs.reserve(src_arr.size());
|
||||
|
||||
|
@ -36,18 +36,13 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int FUNCTION_NOT_ALLOWED;
|
||||
}
|
||||
|
||||
|
||||
/// The argument limiting raises from Volnitsky searcher -- it is performance crucial to save only one byte for pattern number.
|
||||
/// But some other searchers use this function, for example, multiMatchAny -- hyperscan does not have such restrictions
|
||||
template <typename Impl, size_t LimitArgs = std::numeric_limits<UInt8>::max()>
|
||||
template <typename Impl>
|
||||
class FunctionsMultiStringSearch : public IFunction
|
||||
{
|
||||
static_assert(LimitArgs > 0);
|
||||
|
||||
public:
|
||||
static constexpr auto name = Impl::name;
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
@ -97,12 +92,6 @@ public:
|
||||
|
||||
Array src_arr = col_const_arr->getValue<Array>();
|
||||
|
||||
if (src_arr.size() > LimitArgs)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be at most {}",
|
||||
getName(), std::to_string(src_arr.size()), std::to_string(LimitArgs));
|
||||
|
||||
std::vector<std::string_view> refs;
|
||||
refs.reserve(src_arr.size());
|
||||
|
||||
|
@ -7,6 +7,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
template <typename Name, typename Impl>
|
||||
struct MultiSearchFirstIndexImpl
|
||||
{
|
||||
@ -28,6 +33,12 @@ struct MultiSearchFirstIndexImpl
|
||||
size_t /*max_hyperscan_regexp_length*/,
|
||||
size_t /*max_hyperscan_regexp_total_length*/)
|
||||
{
|
||||
// For performance of Volnitsky search, it is crucial to save only one byte for pattern number.
|
||||
if (needles.size() > std::numeric_limits<UInt8>::max())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be at most {}",
|
||||
name, std::to_string(needles.size()), std::to_string(std::numeric_limits<UInt8>::max()));
|
||||
|
||||
auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
res.resize(haystack_string_size);
|
||||
|
@ -7,6 +7,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
template <typename Name, typename Impl>
|
||||
struct MultiSearchImpl
|
||||
{
|
||||
@ -28,6 +33,12 @@ struct MultiSearchImpl
|
||||
size_t /*max_hyperscan_regexp_length*/,
|
||||
size_t /*max_hyperscan_regexp_total_length*/)
|
||||
{
|
||||
// For performance of Volnitsky search, it is crucial to save only one byte for pattern number.
|
||||
if (needles.size() > std::numeric_limits<UInt8>::max())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be at most {}",
|
||||
name, std::to_string(needles.size()), std::to_string(std::numeric_limits<UInt8>::max()));
|
||||
|
||||
auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
res.resize(haystack_string_size);
|
||||
|
@ -13,9 +13,7 @@ struct NameMultiFuzzyMatchAllIndices
|
||||
static constexpr auto name = "multiFuzzyMatchAllIndices";
|
||||
};
|
||||
|
||||
using FunctionMultiFuzzyMatchAllIndices = FunctionsMultiStringFuzzySearch<
|
||||
MultiMatchAllIndicesImpl<NameMultiFuzzyMatchAllIndices, UInt64, true>,
|
||||
std::numeric_limits<UInt32>::max()>;
|
||||
using FunctionMultiFuzzyMatchAllIndices = FunctionsMultiStringFuzzySearch<MultiMatchAllIndicesImpl<NameMultiFuzzyMatchAllIndices, UInt64, true>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,9 +13,7 @@ struct NameMultiFuzzyMatchAny
|
||||
static constexpr auto name = "multiFuzzyMatchAny";
|
||||
};
|
||||
|
||||
using FunctionMultiFuzzyMatchAny = FunctionsMultiStringFuzzySearch<
|
||||
MultiMatchAnyImpl<NameMultiFuzzyMatchAny, UInt8, true, false, true>,
|
||||
std::numeric_limits<UInt32>::max()>;
|
||||
using FunctionMultiFuzzyMatchAny = FunctionsMultiStringFuzzySearch<MultiMatchAnyImpl<NameMultiFuzzyMatchAny, UInt8, true, false, true>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,9 +13,7 @@ struct NameMultiFuzzyMatchAnyIndex
|
||||
static constexpr auto name = "multiFuzzyMatchAnyIndex";
|
||||
};
|
||||
|
||||
using FunctionMultiFuzzyMatchAnyIndex = FunctionsMultiStringFuzzySearch<
|
||||
MultiMatchAnyImpl<NameMultiFuzzyMatchAnyIndex, UInt64, false, true, true>,
|
||||
std::numeric_limits<UInt32>::max()>;
|
||||
using FunctionMultiFuzzyMatchAnyIndex = FunctionsMultiStringFuzzySearch<MultiMatchAnyImpl<NameMultiFuzzyMatchAnyIndex, UInt64, false, true, true>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,9 +13,7 @@ struct NameMultiMatchAllIndices
|
||||
static constexpr auto name = "multiMatchAllIndices";
|
||||
};
|
||||
|
||||
using FunctionMultiMatchAllIndices = FunctionsMultiStringSearch<
|
||||
MultiMatchAllIndicesImpl<NameMultiMatchAllIndices, UInt64, false>,
|
||||
std::numeric_limits<UInt32>::max()>;
|
||||
using FunctionMultiMatchAllIndices = FunctionsMultiStringSearch<MultiMatchAllIndicesImpl<NameMultiMatchAllIndices, UInt64, false>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,9 +13,7 @@ struct NameMultiMatchAny
|
||||
static constexpr auto name = "multiMatchAny";
|
||||
};
|
||||
|
||||
using FunctionMultiMatchAny = FunctionsMultiStringSearch<
|
||||
MultiMatchAnyImpl<NameMultiMatchAny, UInt8, true, false, false>,
|
||||
std::numeric_limits<UInt32>::max()>;
|
||||
using FunctionMultiMatchAny = FunctionsMultiStringSearch<MultiMatchAnyImpl<NameMultiMatchAny, UInt8, true, false, false>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,9 +13,7 @@ struct NameMultiMatchAnyIndex
|
||||
static constexpr auto name = "multiMatchAnyIndex";
|
||||
};
|
||||
|
||||
using FunctionMultiMatchAnyIndex = FunctionsMultiStringSearch<
|
||||
MultiMatchAnyImpl<NameMultiMatchAnyIndex, UInt64, false, true, false>,
|
||||
std::numeric_limits<UInt32>::max()>;
|
||||
using FunctionMultiMatchAnyIndex = FunctionsMultiStringSearch<MultiMatchAnyImpl<NameMultiMatchAnyIndex, UInt64, false, true, false>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,8 +13,7 @@ struct NameMultiSearchAnyCaseInsensitive
|
||||
{
|
||||
static constexpr auto name = "multiSearchAnyCaseInsensitive";
|
||||
};
|
||||
using FunctionMultiSearchCaseInsensitive
|
||||
= FunctionsMultiStringSearch<MultiSearchImpl<NameMultiSearchAnyCaseInsensitive, PositionCaseInsensitiveASCII>>;
|
||||
using FunctionMultiSearchCaseInsensitive = FunctionsMultiStringSearch<MultiSearchImpl<NameMultiSearchAnyCaseInsensitive, PositionCaseInsensitiveASCII>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -14,8 +14,7 @@ struct NameMultiSearchFirstIndex
|
||||
static constexpr auto name = "multiSearchFirstIndex";
|
||||
};
|
||||
|
||||
using FunctionMultiSearchFirstIndex
|
||||
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<NameMultiSearchFirstIndex, PositionCaseSensitiveASCII>>;
|
||||
using FunctionMultiSearchFirstIndex = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<NameMultiSearchFirstIndex, PositionCaseSensitiveASCII>>;
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user