mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 07:01:59 +00:00
Add settings to check hyperscan regexp length.
This commit is contained in:
parent
14ef66e49b
commit
aa33a7add1
@ -377,6 +377,8 @@ class IColumn;
|
||||
M(Bool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql', 'postgresql' and 'odbc' table functions.", 0) \
|
||||
\
|
||||
M(Bool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.", 0) \
|
||||
M(UInt64, max_hyperscan_regexp_length, 0, "Max length of regexp than can be used in hyperscan multi-match functions. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_hyperscan_regexp_total_length, 0, "Max total length of all regexps than can be used in hyperscan multi-match functions. Zero means unlimited.", 0) \
|
||||
M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \
|
||||
M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
|
||||
\
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <common/StringRef.h>
|
||||
@ -40,7 +41,13 @@ public:
|
||||
throw Exception(
|
||||
"Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
|
||||
|
||||
return std::make_shared<FunctionsMultiStringFuzzySearch>();
|
||||
return std::make_shared<FunctionsMultiStringFuzzySearch>(
|
||||
context->getSettingsRef().max_hyperscan_regexp_length, context->getSettingsRef().max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
FunctionsMultiStringFuzzySearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
|
||||
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -113,6 +120,9 @@ public:
|
||||
for (const auto & el : src_arr)
|
||||
refs.emplace_back(el.get<String>());
|
||||
|
||||
if (Impl::is_using_hyperscan)
|
||||
checkRegexp(refs, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
auto col_offsets = ColumnArray::ColumnOffsets::create();
|
||||
|
||||
@ -131,6 +141,10 @@ public:
|
||||
else
|
||||
return col_res;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_hyperscan_regexp_length;
|
||||
size_t max_hyperscan_regexp_total_length;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <common/StringRef.h>
|
||||
@ -53,7 +54,13 @@ public:
|
||||
throw Exception(
|
||||
"Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
|
||||
|
||||
return std::make_shared<FunctionsMultiStringSearch>();
|
||||
return std::make_shared<FunctionsMultiStringSearch>(
|
||||
context->getSettingsRef().max_hyperscan_regexp_length, context->getSettingsRef().max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
FunctionsMultiStringSearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
|
||||
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -105,6 +112,9 @@ public:
|
||||
for (const auto & el : src_arr)
|
||||
refs.emplace_back(el.get<String>());
|
||||
|
||||
if (Impl::is_using_hyperscan)
|
||||
checkRegexp(refs, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
auto col_offsets = ColumnArray::ColumnOffsets::create();
|
||||
|
||||
@ -122,6 +132,10 @@ public:
|
||||
else
|
||||
return col_res;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_hyperscan_regexp_length;
|
||||
size_t max_hyperscan_regexp_total_length;
|
||||
};
|
||||
|
||||
}
|
||||
|
29
src/Functions/hyperscanRegexpChecker.cpp
Normal file
29
src/Functions/hyperscanRegexpChecker.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void checkRegexp(const std::vector<StringRef> & refs, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length)
|
||||
{
|
||||
if (max_hyperscan_regexp_length > 0 || max_hyperscan_regexp_total_length > 0)
|
||||
{
|
||||
size_t total_regexp_length = 0;
|
||||
for (const auto & pattern : refs)
|
||||
{
|
||||
if (max_hyperscan_regexp_length > 0 && pattern.size > max_hyperscan_regexp_length)
|
||||
throw Exception("Regexp length too large", ErrorCodes::BAD_ARGUMENTS);
|
||||
total_regexp_length += pattern.size;
|
||||
}
|
||||
|
||||
if (max_hyperscan_regexp_total_length > 0 && total_regexp_length > max_hyperscan_regexp_total_length)
|
||||
throw Exception("Total regexp lengths too large", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
10
src/Functions/hyperscanRegexpChecker.h
Normal file
10
src/Functions/hyperscanRegexpChecker.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/StringRef.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void checkRegexp(const std::vector<StringRef> & refs, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length);
|
||||
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
1
|
||||
1
|
||||
[1]
|
||||
1
|
||||
1
|
||||
[1]
|
@ -0,0 +1,26 @@
|
||||
set max_hyperscan_regexp_length = 1;
|
||||
set max_hyperscan_regexp_total_length = 1;
|
||||
|
||||
select multiMatchAny('123', ['1']);
|
||||
select multiMatchAny('123', ['12']); -- { serverError 36 }
|
||||
select multiMatchAny('123', ['1', '2']); -- { serverError 36 }
|
||||
|
||||
select multiMatchAnyIndex('123', ['1']);
|
||||
select multiMatchAnyIndex('123', ['12']); -- { serverError 36 }
|
||||
select multiMatchAnyIndex('123', ['1', '2']); -- { serverError 36 }
|
||||
|
||||
select multiMatchAllIndices('123', ['1']);
|
||||
select multiMatchAllIndices('123', ['12']); -- { serverError 36 }
|
||||
select multiMatchAllIndices('123', ['1', '2']); -- { serverError 36 }
|
||||
|
||||
select multiFuzzyMatchAny('123', 0, ['1']);
|
||||
select multiFuzzyMatchAny('123', 0, ['12']); -- { serverError 36 }
|
||||
select multiFuzzyMatchAny('123', 0, ['1', '2']); -- { serverError 36 }
|
||||
|
||||
select multiFuzzyMatchAnyIndex('123', 0, ['1']);
|
||||
select multiFuzzyMatchAnyIndex('123', 0, ['12']); -- { serverError 36 }
|
||||
select multiFuzzyMatchAnyIndex('123', 0, ['1', '2']); -- { serverError 36 }
|
||||
|
||||
select multiFuzzyMatchAllIndices('123', 0, ['1']);
|
||||
select multiFuzzyMatchAllIndices('123', 0, ['12']); -- { serverError 36 }
|
||||
select multiFuzzyMatchAllIndices('123', 0, ['1', '2']); -- { serverError 36 }
|
Loading…
Reference in New Issue
Block a user