Move check if cfg allows hyperscan into implementations

- This is not needed for non-const regexp array arguments but cleans up
  the code and runs the check only in functions which actually use
  hyperscan.
This commit is contained in:
Robert Schulze 2022-06-25 17:53:11 +02:00
parent c2cea38b97
commit e2b11899a1
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
7 changed files with 34 additions and 25 deletions

View File

@ -30,18 +30,17 @@ class FunctionsMultiStringFuzzySearch : public IFunction
{
public:
static constexpr auto name = Impl::name;
static FunctionPtr create(ContextPtr context)
{
const auto & settings = context->getSettingsRef();
if (Impl::is_using_hyperscan && !settings.allow_hyperscan)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
return std::make_shared<FunctionsMultiStringFuzzySearch>(settings.max_hyperscan_regexp_length, settings.max_hyperscan_regexp_total_length);
return std::make_shared<FunctionsMultiStringFuzzySearch>(settings.allow_hyperscan, settings.max_hyperscan_regexp_length, settings.max_hyperscan_regexp_total_length);
}
FunctionsMultiStringFuzzySearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
FunctionsMultiStringFuzzySearch(bool allow_hyperscan_, size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
: allow_hyperscan(allow_hyperscan_)
, max_hyperscan_regexp_length(max_hyperscan_regexp_length_)
, max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
{}
String getName() const override { return name; }
@ -108,7 +107,7 @@ public:
Impl::vectorConstant(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res, offsets_res, edit_distance,
max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
if constexpr (Impl::is_column_array)
return ColumnArray::create(std::move(col_res), std::move(col_offsets));
@ -117,6 +116,7 @@ public:
}
private:
const bool allow_hyperscan;
const size_t max_hyperscan_regexp_length;
const size_t max_hyperscan_regexp_total_length;
};

View File

@ -45,18 +45,17 @@ class FunctionsMultiStringSearch : public IFunction
{
public:
static constexpr auto name = Impl::name;
static FunctionPtr create(ContextPtr context)
{
const auto & settings = context->getSettingsRef();
if (Impl::is_using_hyperscan && !settings.allow_hyperscan)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
return std::make_shared<FunctionsMultiStringSearch>(settings.max_hyperscan_regexp_length, settings.max_hyperscan_regexp_total_length);
return std::make_shared<FunctionsMultiStringSearch>(settings.allow_hyperscan, settings.max_hyperscan_regexp_length, settings.max_hyperscan_regexp_total_length);
}
FunctionsMultiStringSearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
FunctionsMultiStringSearch(bool allow_hyperscan_, size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
: allow_hyperscan(allow_hyperscan_)
, max_hyperscan_regexp_length(max_hyperscan_regexp_length_)
, max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
{}
String getName() const override { return name; }
@ -108,7 +107,7 @@ public:
Impl::vectorConstant(
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res, offsets_res,
max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
if constexpr (Impl::is_column_array)
return ColumnArray::create(std::move(col_res), std::move(col_offsets));
@ -117,6 +116,7 @@ public:
}
private:
const bool allow_hyperscan;
const size_t max_hyperscan_regexp_length;
const size_t max_hyperscan_regexp_total_length;
};

View File

@ -20,8 +20,9 @@ namespace DB
namespace ErrorCodes
{
extern const int HYPERSCAN_CANNOT_SCAN_TEXT;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int FUNCTION_NOT_ALLOWED;
extern const int HYPERSCAN_CANNOT_SCAN_TEXT;
extern const int NOT_IMPLEMENTED;
extern const int TOO_MANY_BYTES;
}
@ -32,7 +33,6 @@ struct MultiMatchAllIndicesImpl
{
using ResultType = ResultType_;
static constexpr bool is_using_hyperscan = true;
/// Variable for understanding, if we used offsets for the output, most
/// likely to determine whether the function returns ColumnVector of ColumnArray.
static constexpr bool is_column_array = true;
@ -49,10 +49,11 @@ struct MultiMatchAllIndicesImpl
const std::vector<std::string_view> & needles,
PaddedPODArray<ResultType> & res,
PaddedPODArray<UInt64> & offsets,
bool allow_hyperscan,
size_t max_hyperscan_regexp_length,
size_t max_hyperscan_regexp_total_length)
{
vectorConstant(haystack_data, haystack_offsets, needles, res, offsets, std::nullopt, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
vectorConstant(haystack_data, haystack_offsets, needles, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
}
static void vectorConstant(
@ -62,9 +63,12 @@ struct MultiMatchAllIndicesImpl
[[maybe_unused]] PaddedPODArray<ResultType> & res,
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
[[maybe_unused]] std::optional<UInt32> edit_distance,
bool allow_hyperscan,
[[maybe_unused]] size_t max_hyperscan_regexp_length,
[[maybe_unused]] size_t max_hyperscan_regexp_total_length)
{
if (!allow_hyperscan)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
#if USE_VECTORSCAN
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);

View File

@ -21,8 +21,9 @@ namespace DB
namespace ErrorCodes
{
extern const int HYPERSCAN_CANNOT_SCAN_TEXT;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int FUNCTION_NOT_ALLOWED;
extern const int HYPERSCAN_CANNOT_SCAN_TEXT;
extern const int NOT_IMPLEMENTED;
extern const int TOO_MANY_BYTES;
}
@ -45,7 +46,6 @@ struct MultiMatchAnyImpl
static constexpr bool FindAny = (Find == MultiMatchTraits::Find::Any);
static constexpr bool FindAnyIndex = (Find == MultiMatchTraits::Find::AnyIndex);
static constexpr bool is_using_hyperscan = true;
/// Variable for understanding, if we used offsets for the output, most
/// likely to determine whether the function returns ColumnVector of ColumnArray.
static constexpr bool is_column_array = false;
@ -62,10 +62,11 @@ struct MultiMatchAnyImpl
const std::vector<std::string_view> & needles,
PaddedPODArray<ResultType> & res,
PaddedPODArray<UInt64> & offsets,
bool allow_hyperscan,
size_t max_hyperscan_regexp_length,
size_t max_hyperscan_regexp_total_length)
{
vectorConstant(haystack_data, haystack_offsets, needles, res, offsets, std::nullopt, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
vectorConstant(haystack_data, haystack_offsets, needles, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
}
static void vectorConstant(
@ -75,9 +76,13 @@ struct MultiMatchAnyImpl
PaddedPODArray<ResultType> & res,
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
[[maybe_unused]] std::optional<UInt32> edit_distance,
bool allow_hyperscan,
size_t max_hyperscan_regexp_length,
size_t max_hyperscan_regexp_total_length)
{
if (!allow_hyperscan)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
res.resize(haystack_offsets.size());

View File

@ -16,7 +16,6 @@ template <typename Name, typename Impl>
struct MultiSearchFirstIndexImpl
{
using ResultType = UInt64;
static constexpr bool is_using_hyperscan = false;
/// Variable for understanding, if we used offsets for the output, most
/// likely to determine whether the function returns ColumnVector of ColumnArray.
static constexpr bool is_column_array = false;
@ -30,6 +29,7 @@ struct MultiSearchFirstIndexImpl
const std::vector<std::string_view> & needles,
PaddedPODArray<UInt64> & res,
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
bool /*allow_hyperscan*/,
size_t /*max_hyperscan_regexp_length*/,
size_t /*max_hyperscan_regexp_total_length*/)
{

View File

@ -16,7 +16,6 @@ template <typename Name, typename Impl>
struct MultiSearchFirstPositionImpl
{
using ResultType = UInt64;
static constexpr bool is_using_hyperscan = false;
/// Variable for understanding, if we used offsets for the output, most
/// likely to determine whether the function returns ColumnVector of ColumnArray.
static constexpr bool is_column_array = false;
@ -30,6 +29,7 @@ struct MultiSearchFirstPositionImpl
const std::vector<std::string_view> & needles,
PaddedPODArray<UInt64> & res,
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
bool /*allow_hyperscan*/,
size_t /*max_hyperscan_regexp_length*/,
size_t /*max_hyperscan_regexp_total_length*/)
{

View File

@ -16,7 +16,6 @@ template <typename Name, typename Impl>
struct MultiSearchImpl
{
using ResultType = UInt8;
static constexpr bool is_using_hyperscan = false;
/// Variable for understanding, if we used offsets for the output, most
/// likely to determine whether the function returns ColumnVector of ColumnArray.
static constexpr bool is_column_array = false;
@ -30,6 +29,7 @@ struct MultiSearchImpl
const std::vector<std::string_view> & needles,
PaddedPODArray<UInt8> & res,
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
bool /*allow_hyperscan*/,
size_t /*max_hyperscan_regexp_length*/,
size_t /*max_hyperscan_regexp_total_length*/)
{