Add settings to check hyperscan regexp length.

This commit is contained in:
Amos Bird 2021-08-07 13:07:41 +08:00
parent 14ef66e49b
commit aa33a7add1
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
7 changed files with 103 additions and 2 deletions

View File

@ -377,6 +377,8 @@ class IColumn;
M(Bool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql', 'postgresql' and 'odbc' table functions.", 0) \ M(Bool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql', 'postgresql' and 'odbc' table functions.", 0) \
\ \
M(Bool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.", 0) \ M(Bool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.", 0) \
M(UInt64, max_hyperscan_regexp_length, 0, "Max length of regexp than can be used in hyperscan multi-match functions. Zero means unlimited.", 0) \
M(UInt64, max_hyperscan_regexp_total_length, 0, "Max total length of all regexps than can be used in hyperscan multi-match functions. Zero means unlimited.", 0) \
M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \ M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \
M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \ M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
\ \

View File

@ -10,6 +10,7 @@
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/hyperscanRegexpChecker.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <common/StringRef.h> #include <common/StringRef.h>
@ -40,7 +41,13 @@ public:
throw Exception( throw Exception(
"Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED); "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
return std::make_shared<FunctionsMultiStringFuzzySearch>(); return std::make_shared<FunctionsMultiStringFuzzySearch>(
context->getSettingsRef().max_hyperscan_regexp_length, context->getSettingsRef().max_hyperscan_regexp_total_length);
}
FunctionsMultiStringFuzzySearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
{
} }
String getName() const override { return name; } String getName() const override { return name; }
@ -113,6 +120,9 @@ public:
for (const auto & el : src_arr) for (const auto & el : src_arr)
refs.emplace_back(el.get<String>()); refs.emplace_back(el.get<String>());
if (Impl::is_using_hyperscan)
checkRegexp(refs, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
auto col_res = ColumnVector<ResultType>::create(); auto col_res = ColumnVector<ResultType>::create();
auto col_offsets = ColumnArray::ColumnOffsets::create(); auto col_offsets = ColumnArray::ColumnOffsets::create();
@ -131,6 +141,10 @@ public:
else else
return col_res; return col_res;
} }
private:
size_t max_hyperscan_regexp_length;
size_t max_hyperscan_regexp_total_length;
}; };
} }

View File

@ -10,6 +10,7 @@
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/hyperscanRegexpChecker.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <common/StringRef.h> #include <common/StringRef.h>
@ -53,7 +54,13 @@ public:
throw Exception( throw Exception(
"Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED); "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
return std::make_shared<FunctionsMultiStringSearch>(); return std::make_shared<FunctionsMultiStringSearch>(
context->getSettingsRef().max_hyperscan_regexp_length, context->getSettingsRef().max_hyperscan_regexp_total_length);
}
FunctionsMultiStringSearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
{
} }
String getName() const override { return name; } String getName() const override { return name; }
@ -105,6 +112,9 @@ public:
for (const auto & el : src_arr) for (const auto & el : src_arr)
refs.emplace_back(el.get<String>()); refs.emplace_back(el.get<String>());
if (Impl::is_using_hyperscan)
checkRegexp(refs, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
auto col_res = ColumnVector<ResultType>::create(); auto col_res = ColumnVector<ResultType>::create();
auto col_offsets = ColumnArray::ColumnOffsets::create(); auto col_offsets = ColumnArray::ColumnOffsets::create();
@ -122,6 +132,10 @@ public:
else else
return col_res; return col_res;
} }
private:
size_t max_hyperscan_regexp_length;
size_t max_hyperscan_regexp_total_length;
}; };
} }

View File

@ -0,0 +1,29 @@
#include <Functions/hyperscanRegexpChecker.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
void checkRegexp(const std::vector<StringRef> & refs, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length)
{
if (max_hyperscan_regexp_length > 0 || max_hyperscan_regexp_total_length > 0)
{
size_t total_regexp_length = 0;
for (const auto & pattern : refs)
{
if (max_hyperscan_regexp_length > 0 && pattern.size > max_hyperscan_regexp_length)
throw Exception("Regexp length too large", ErrorCodes::BAD_ARGUMENTS);
total_regexp_length += pattern.size;
}
if (max_hyperscan_regexp_total_length > 0 && total_regexp_length > max_hyperscan_regexp_total_length)
throw Exception("Total regexp lengths too large", ErrorCodes::BAD_ARGUMENTS);
}
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <common/StringRef.h>
namespace DB
{
void checkRegexp(const std::vector<StringRef> & refs, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length);
}

View File

@ -0,0 +1,6 @@
1
1
[1]
1
1
[1]

View File

@ -0,0 +1,26 @@
set max_hyperscan_regexp_length = 1;
set max_hyperscan_regexp_total_length = 1;
select multiMatchAny('123', ['1']);
select multiMatchAny('123', ['12']); -- { serverError 36 }
select multiMatchAny('123', ['1', '2']); -- { serverError 36 }
select multiMatchAnyIndex('123', ['1']);
select multiMatchAnyIndex('123', ['12']); -- { serverError 36 }
select multiMatchAnyIndex('123', ['1', '2']); -- { serverError 36 }
select multiMatchAllIndices('123', ['1']);
select multiMatchAllIndices('123', ['12']); -- { serverError 36 }
select multiMatchAllIndices('123', ['1', '2']); -- { serverError 36 }
select multiFuzzyMatchAny('123', 0, ['1']);
select multiFuzzyMatchAny('123', 0, ['12']); -- { serverError 36 }
select multiFuzzyMatchAny('123', 0, ['1', '2']); -- { serverError 36 }
select multiFuzzyMatchAnyIndex('123', 0, ['1']);
select multiFuzzyMatchAnyIndex('123', 0, ['12']); -- { serverError 36 }
select multiFuzzyMatchAnyIndex('123', 0, ['1', '2']); -- { serverError 36 }
select multiFuzzyMatchAllIndices('123', 0, ['1']);
select multiFuzzyMatchAllIndices('123', 0, ['12']); -- { serverError 36 }
select multiFuzzyMatchAllIndices('123', 0, ['1', '2']); -- { serverError 36 }