mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Updated extractAllGroupsHorizontal - flexible limit on number of matches per row.
If it is not set via third argument, it deafults to previously hardcoded value 1000.
This commit is contained in:
parent
32bd94a84b
commit
ec77ba8bfc
@ -55,7 +55,8 @@ public:
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
size_t getNumberOfArguments() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL ? 0 : 2; }
|
||||
bool isVariadic() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
@ -66,7 +67,13 @@ public:
|
||||
{"haystack", isStringOrFixedString, nullptr, "const String or const FixedString"},
|
||||
{"needle", isStringOrFixedString, isColumnConst, "const String or const FixedString"},
|
||||
};
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
FunctionArgumentDescriptors optional_args;
|
||||
if constexpr (Kind == ExtractAllGroupsResultKind::HORIZONTAL)
|
||||
{
|
||||
optional_args.push_back(FunctionArgumentDescriptor{"max_matches_per_row", isUnsignedInteger, isColumnConst, "const Unsigned Int"});
|
||||
}
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, args, optional_args);
|
||||
|
||||
/// Two-dimensional array of strings, each `row` of top array represents matching groups.
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
|
||||
@ -147,6 +154,10 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Additional limit to fail fast on supposedly incorrect usage, arbitrary value.
|
||||
static constexpr size_t MAX_MATCHES_PER_ROW = 1000;
|
||||
const auto max_matches_per_row = arguments.size() >= 3 ? arguments[2].column->getUInt(0) : MAX_MATCHES_PER_ROW;
|
||||
|
||||
PODArray<StringPiece, 0> all_matches;
|
||||
/// Number of times RE matched on each row of haystack column.
|
||||
PODArray<size_t, 0> number_of_matches_per_row;
|
||||
@ -172,16 +183,13 @@ public:
|
||||
for (size_t group = 1; group <= groups_count; ++group)
|
||||
all_matches.push_back(matched_groups[group]);
|
||||
|
||||
/// Additional limit to fail fast on supposedly incorrect usage, arbitrary value.
|
||||
static constexpr size_t MAX_MATCHES_PER_ROW = 1000;
|
||||
if (matches_per_row > MAX_MATCHES_PER_ROW)
|
||||
++matches_per_row;
|
||||
if (matches_per_row > max_matches_per_row)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too many matches per row (> {}) in the result of function {}",
|
||||
MAX_MATCHES_PER_ROW, getName());
|
||||
max_matches_per_row, getName());
|
||||
|
||||
pos = matched_groups[0].data() + std::max<size_t>(1, matched_groups[0].size());
|
||||
|
||||
++matches_per_row;
|
||||
}
|
||||
|
||||
number_of_matches_per_row.push_back(matches_per_row);
|
||||
|
@ -1,4 +1,3 @@
|
||||
0 groups, zero matches
|
||||
1 group, multiple matches, String and FixedString
|
||||
[['hello','world']]
|
||||
[['hello','world']]
|
||||
|
@ -5,9 +5,13 @@ SELECT extractAllGroupsHorizontal('hello', 123); --{serverError 43} invalid arg
|
||||
SELECT extractAllGroupsHorizontal(123, 'world'); --{serverError 43} invalid argument type
|
||||
SELECT extractAllGroupsHorizontal('hello world', '((('); --{serverError 427} invalid re
|
||||
SELECT extractAllGroupsHorizontal('hello world', materialize('\\w+')); --{serverError 44} non-cons needle
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 'foobar'); --{serverError 43} invalid argument type
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', materialize(10)); --{serverError 44} non-const max_matches_per_row
|
||||
SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 } 0 groups
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 0); -- { serverError 128 } to many groups matched per row
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1); -- { serverError 128 } to many groups matched per row
|
||||
|
||||
SELECT '0 groups, zero matches';
|
||||
SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 }
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1000000000) FORMAT Null; -- users now can set limit bigger than previous 1000 matches per row
|
||||
|
||||
SELECT '1 group, multiple matches, String and FixedString';
|
||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)');
|
||||
|
@ -1,4 +1,3 @@
|
||||
0 groups, zero matches
|
||||
1 group, multiple matches, String and FixedString
|
||||
[['hello'],['world']]
|
||||
[['hello'],['world']]
|
||||
|
@ -5,9 +5,8 @@ SELECT extractAllGroupsVertical('hello', 123); --{serverError 43} invalid argum
|
||||
SELECT extractAllGroupsVertical(123, 'world'); --{serverError 43} invalid argument type
|
||||
SELECT extractAllGroupsVertical('hello world', '((('); --{serverError 427} invalid re
|
||||
SELECT extractAllGroupsVertical('hello world', materialize('\\w+')); --{serverError 44} non-const needle
|
||||
|
||||
SELECT '0 groups, zero matches';
|
||||
SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 }
|
||||
SELECT extractAllGroupsVertical('hello world', '(\\w+)', 123); --{serverError 42} only 2 arguments
|
||||
SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 } 0 groups
|
||||
|
||||
SELECT '1 group, multiple matches, String and FixedString';
|
||||
SELECT extractAllGroupsVertical('hello world', '(\\w+)');
|
||||
|
Loading…
Reference in New Issue
Block a user