setting regexp_max_matches_per_row instead of 3rd argument

This commit is contained in:
Vasily Nemkov 2021-07-30 12:20:02 +03:00
parent ec77ba8bfc
commit a1fb16df52
4 changed files with 17 additions and 19 deletions

View File

@ -475,6 +475,7 @@ class IColumn;
M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
M(UInt64, regexp_max_matches_per_row, 1000, "Max matches of any single regexp per row, used to safeguard extractAllGroupsHorizontal() against consuming too much memory with greedy RE.", 0) \
\
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \

View File

@ -7,6 +7,8 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/Regexps.h>
#include <Interpreters/Context.h>
#include <Core/Settings.h>
#include <memory>
#include <string>
@ -47,16 +49,21 @@ enum class ExtractAllGroupsResultKind
template <typename Impl>
class FunctionExtractAllGroups : public IFunction
{
ContextPtr context;
public:
static constexpr auto Kind = Impl::Kind;
static constexpr auto name = Impl::Name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionExtractAllGroups>(); }
FunctionExtractAllGroups(ContextPtr context_)
: context(context_)
{}
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionExtractAllGroups>(context); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL ? 0 : 2; }
bool isVariadic() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
@ -67,13 +74,7 @@ public:
{"haystack", isStringOrFixedString, nullptr, "const String or const FixedString"},
{"needle", isStringOrFixedString, isColumnConst, "const String or const FixedString"},
};
FunctionArgumentDescriptors optional_args;
if constexpr (Kind == ExtractAllGroupsResultKind::HORIZONTAL)
{
optional_args.push_back(FunctionArgumentDescriptor{"max_matches_per_row", isUnsignedInteger, isColumnConst, "const Unsigned Int"});
}
validateFunctionArgumentTypes(*this, arguments, args, optional_args);
validateFunctionArgumentTypes(*this, arguments, args);
/// Two-dimensional array of strings, each `row` of top array represents matching groups.
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
@ -154,9 +155,8 @@ public:
}
else
{
/// Additional limit to fail fast on supposedly incorrect usage, arbitrary value.
static constexpr size_t MAX_MATCHES_PER_ROW = 1000;
const auto max_matches_per_row = arguments.size() >= 3 ? arguments[2].column->getUInt(0) : MAX_MATCHES_PER_ROW;
/// Additional limit to fail fast on supposedly incorrect usage.
const auto max_matches_per_row = context->getSettingsRef().regexp_max_matches_per_row;
PODArray<StringPiece, 0> all_matches;
/// Number of times RE matched on each row of haystack column.

View File

@ -5,13 +5,11 @@ SELECT extractAllGroupsHorizontal('hello', 123); --{serverError 43} invalid arg
SELECT extractAllGroupsHorizontal(123, 'world'); --{serverError 43} invalid argument type
SELECT extractAllGroupsHorizontal('hello world', '((('); --{serverError 427} invalid re
SELECT extractAllGroupsHorizontal('hello world', materialize('\\w+')); --{serverError 44} non-cons needle
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 'foobar'); --{serverError 43} invalid argument type
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', materialize(10)); --{serverError 44} non-const max_matches_per_row
SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 } 0 groups
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 0); -- { serverError 128 } to many groups matched per row
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1); -- { serverError 128 } to many groups matched per row
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)') SETTINGS regexp_max_matches_per_row = 0; -- { serverError 128 } to many groups matched per row
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)') SETTINGS regexp_max_matches_per_row = 1; -- { serverError 128 } to many groups matched per row
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1000000000) FORMAT Null; -- users now can set limit bigger than previous 1000 matches per row
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)') SETTINGS regexp_max_matches_per_row = 1000000 FORMAT Null; -- users now can set limit bigger than previous 1000 matches per row
SELECT '1 group, multiple matches, String and FixedString';
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)');

View File

@ -5,7 +5,6 @@ SELECT extractAllGroupsVertical('hello', 123); --{serverError 43} invalid argum
SELECT extractAllGroupsVertical(123, 'world'); --{serverError 43} invalid argument type
SELECT extractAllGroupsVertical('hello world', '((('); --{serverError 427} invalid re
SELECT extractAllGroupsVertical('hello world', materialize('\\w+')); --{serverError 44} non-const needle
SELECT extractAllGroupsVertical('hello world', '(\\w+)', 123); --{serverError 42} only 2 arguments
SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 } 0 groups
SELECT '1 group, multiple matches, String and FixedString';