mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
setting regexp_max_matches_per_row instead of 3rd argument
This commit is contained in:
parent
ec77ba8bfc
commit
a1fb16df52
@ -475,6 +475,7 @@ class IColumn;
|
||||
M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
|
||||
M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
|
||||
M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
|
||||
M(UInt64, regexp_max_matches_per_row, 1000, "Max matches of any single regexp per row, used to safeguard extractAllGroupsHorizontal() against consuming too much memory with greedy RE.", 0) \
|
||||
\
|
||||
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
|
||||
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/Regexps.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
@ -47,16 +49,21 @@ enum class ExtractAllGroupsResultKind
|
||||
template <typename Impl>
|
||||
class FunctionExtractAllGroups : public IFunction
|
||||
{
|
||||
ContextPtr context;
|
||||
|
||||
public:
|
||||
static constexpr auto Kind = Impl::Kind;
|
||||
static constexpr auto name = Impl::Name;
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionExtractAllGroups>(); }
|
||||
FunctionExtractAllGroups(ContextPtr context_)
|
||||
: context(context_)
|
||||
{}
|
||||
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionExtractAllGroups>(context); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL ? 0 : 2; }
|
||||
bool isVariadic() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL; }
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
@ -67,13 +74,7 @@ public:
|
||||
{"haystack", isStringOrFixedString, nullptr, "const String or const FixedString"},
|
||||
{"needle", isStringOrFixedString, isColumnConst, "const String or const FixedString"},
|
||||
};
|
||||
FunctionArgumentDescriptors optional_args;
|
||||
if constexpr (Kind == ExtractAllGroupsResultKind::HORIZONTAL)
|
||||
{
|
||||
optional_args.push_back(FunctionArgumentDescriptor{"max_matches_per_row", isUnsignedInteger, isColumnConst, "const Unsigned Int"});
|
||||
}
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, args, optional_args);
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
|
||||
/// Two-dimensional array of strings, each `row` of top array represents matching groups.
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
|
||||
@ -154,9 +155,8 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Additional limit to fail fast on supposedly incorrect usage, arbitrary value.
|
||||
static constexpr size_t MAX_MATCHES_PER_ROW = 1000;
|
||||
const auto max_matches_per_row = arguments.size() >= 3 ? arguments[2].column->getUInt(0) : MAX_MATCHES_PER_ROW;
|
||||
/// Additional limit to fail fast on supposedly incorrect usage.
|
||||
const auto max_matches_per_row = context->getSettingsRef().regexp_max_matches_per_row;
|
||||
|
||||
PODArray<StringPiece, 0> all_matches;
|
||||
/// Number of times RE matched on each row of haystack column.
|
||||
|
@ -5,13 +5,11 @@ SELECT extractAllGroupsHorizontal('hello', 123); --{serverError 43} invalid arg
|
||||
SELECT extractAllGroupsHorizontal(123, 'world'); --{serverError 43} invalid argument type
|
||||
SELECT extractAllGroupsHorizontal('hello world', '((('); --{serverError 427} invalid re
|
||||
SELECT extractAllGroupsHorizontal('hello world', materialize('\\w+')); --{serverError 44} non-cons needle
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 'foobar'); --{serverError 43} invalid argument type
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', materialize(10)); --{serverError 44} non-const max_matches_per_row
|
||||
SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 } 0 groups
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 0); -- { serverError 128 } to many groups matched per row
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1); -- { serverError 128 } to many groups matched per row
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)') SETTINGS regexp_max_matches_per_row = 0; -- { serverError 128 } to many groups matched per row
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)') SETTINGS regexp_max_matches_per_row = 1; -- { serverError 128 } to many groups matched per row
|
||||
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1000000000) FORMAT Null; -- users now can set limit bigger than previous 1000 matches per row
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)') SETTINGS regexp_max_matches_per_row = 1000000 FORMAT Null; -- users now can set limit bigger than previous 1000 matches per row
|
||||
|
||||
SELECT '1 group, multiple matches, String and FixedString';
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)');
|
||||
|
@ -5,7 +5,6 @@ SELECT extractAllGroupsVertical('hello', 123); --{serverError 43} invalid argum
|
||||
SELECT extractAllGroupsVertical(123, 'world'); --{serverError 43} invalid argument type
|
||||
SELECT extractAllGroupsVertical('hello world', '((('); --{serverError 427} invalid re
|
||||
SELECT extractAllGroupsVertical('hello world', materialize('\\w+')); --{serverError 44} non-const needle
|
||||
SELECT extractAllGroupsVertical('hello world', '(\\w+)', 123); --{serverError 42} only 2 arguments
|
||||
SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 } 0 groups
|
||||
|
||||
SELECT '1 group, multiple matches, String and FixedString';
|
||||
|
Loading…
Reference in New Issue
Block a user