mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Updated extractAllGroupsHorizontal - flexible limit on number of matches per row.
If it is not set via third argument, it deafults to previously hardcoded value 1000.
This commit is contained in:
parent
32bd94a84b
commit
ec77ba8bfc
@ -55,7 +55,8 @@ public:
|
|||||||
|
|
||||||
String getName() const override { return name; }
|
String getName() const override { return name; }
|
||||||
|
|
||||||
size_t getNumberOfArguments() const override { return 2; }
|
size_t getNumberOfArguments() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL ? 0 : 2; }
|
||||||
|
bool isVariadic() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL; }
|
||||||
|
|
||||||
bool useDefaultImplementationForConstants() const override { return true; }
|
bool useDefaultImplementationForConstants() const override { return true; }
|
||||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||||
@ -66,7 +67,13 @@ public:
|
|||||||
{"haystack", isStringOrFixedString, nullptr, "const String or const FixedString"},
|
{"haystack", isStringOrFixedString, nullptr, "const String or const FixedString"},
|
||||||
{"needle", isStringOrFixedString, isColumnConst, "const String or const FixedString"},
|
{"needle", isStringOrFixedString, isColumnConst, "const String or const FixedString"},
|
||||||
};
|
};
|
||||||
validateFunctionArgumentTypes(*this, arguments, args);
|
FunctionArgumentDescriptors optional_args;
|
||||||
|
if constexpr (Kind == ExtractAllGroupsResultKind::HORIZONTAL)
|
||||||
|
{
|
||||||
|
optional_args.push_back(FunctionArgumentDescriptor{"max_matches_per_row", isUnsignedInteger, isColumnConst, "const Unsigned Int"});
|
||||||
|
}
|
||||||
|
|
||||||
|
validateFunctionArgumentTypes(*this, arguments, args, optional_args);
|
||||||
|
|
||||||
/// Two-dimensional array of strings, each `row` of top array represents matching groups.
|
/// Two-dimensional array of strings, each `row` of top array represents matching groups.
|
||||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
|
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
|
||||||
@ -147,6 +154,10 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/// Additional limit to fail fast on supposedly incorrect usage, arbitrary value.
|
||||||
|
static constexpr size_t MAX_MATCHES_PER_ROW = 1000;
|
||||||
|
const auto max_matches_per_row = arguments.size() >= 3 ? arguments[2].column->getUInt(0) : MAX_MATCHES_PER_ROW;
|
||||||
|
|
||||||
PODArray<StringPiece, 0> all_matches;
|
PODArray<StringPiece, 0> all_matches;
|
||||||
/// Number of times RE matched on each row of haystack column.
|
/// Number of times RE matched on each row of haystack column.
|
||||||
PODArray<size_t, 0> number_of_matches_per_row;
|
PODArray<size_t, 0> number_of_matches_per_row;
|
||||||
@ -172,16 +183,13 @@ public:
|
|||||||
for (size_t group = 1; group <= groups_count; ++group)
|
for (size_t group = 1; group <= groups_count; ++group)
|
||||||
all_matches.push_back(matched_groups[group]);
|
all_matches.push_back(matched_groups[group]);
|
||||||
|
|
||||||
/// Additional limit to fail fast on supposedly incorrect usage, arbitrary value.
|
++matches_per_row;
|
||||||
static constexpr size_t MAX_MATCHES_PER_ROW = 1000;
|
if (matches_per_row > max_matches_per_row)
|
||||||
if (matches_per_row > MAX_MATCHES_PER_ROW)
|
|
||||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||||
"Too many matches per row (> {}) in the result of function {}",
|
"Too many matches per row (> {}) in the result of function {}",
|
||||||
MAX_MATCHES_PER_ROW, getName());
|
max_matches_per_row, getName());
|
||||||
|
|
||||||
pos = matched_groups[0].data() + std::max<size_t>(1, matched_groups[0].size());
|
pos = matched_groups[0].data() + std::max<size_t>(1, matched_groups[0].size());
|
||||||
|
|
||||||
++matches_per_row;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
number_of_matches_per_row.push_back(matches_per_row);
|
number_of_matches_per_row.push_back(matches_per_row);
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
0 groups, zero matches
|
|
||||||
1 group, multiple matches, String and FixedString
|
1 group, multiple matches, String and FixedString
|
||||||
[['hello','world']]
|
[['hello','world']]
|
||||||
[['hello','world']]
|
[['hello','world']]
|
||||||
|
@ -5,9 +5,13 @@ SELECT extractAllGroupsHorizontal('hello', 123); --{serverError 43} invalid arg
|
|||||||
SELECT extractAllGroupsHorizontal(123, 'world'); --{serverError 43} invalid argument type
|
SELECT extractAllGroupsHorizontal(123, 'world'); --{serverError 43} invalid argument type
|
||||||
SELECT extractAllGroupsHorizontal('hello world', '((('); --{serverError 427} invalid re
|
SELECT extractAllGroupsHorizontal('hello world', '((('); --{serverError 427} invalid re
|
||||||
SELECT extractAllGroupsHorizontal('hello world', materialize('\\w+')); --{serverError 44} non-cons needle
|
SELECT extractAllGroupsHorizontal('hello world', materialize('\\w+')); --{serverError 44} non-cons needle
|
||||||
|
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 'foobar'); --{serverError 43} invalid argument type
|
||||||
|
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', materialize(10)); --{serverError 44} non-const max_matches_per_row
|
||||||
|
SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 } 0 groups
|
||||||
|
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 0); -- { serverError 128 } to many groups matched per row
|
||||||
|
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1); -- { serverError 128 } to many groups matched per row
|
||||||
|
|
||||||
SELECT '0 groups, zero matches';
|
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1000000000) FORMAT Null; -- users now can set limit bigger than previous 1000 matches per row
|
||||||
SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 }
|
|
||||||
|
|
||||||
SELECT '1 group, multiple matches, String and FixedString';
|
SELECT '1 group, multiple matches, String and FixedString';
|
||||||
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)');
|
SELECT extractAllGroupsHorizontal('hello world', '(\\w+)');
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
0 groups, zero matches
|
|
||||||
1 group, multiple matches, String and FixedString
|
1 group, multiple matches, String and FixedString
|
||||||
[['hello'],['world']]
|
[['hello'],['world']]
|
||||||
[['hello'],['world']]
|
[['hello'],['world']]
|
||||||
|
@ -5,9 +5,8 @@ SELECT extractAllGroupsVertical('hello', 123); --{serverError 43} invalid argum
|
|||||||
SELECT extractAllGroupsVertical(123, 'world'); --{serverError 43} invalid argument type
|
SELECT extractAllGroupsVertical(123, 'world'); --{serverError 43} invalid argument type
|
||||||
SELECT extractAllGroupsVertical('hello world', '((('); --{serverError 427} invalid re
|
SELECT extractAllGroupsVertical('hello world', '((('); --{serverError 427} invalid re
|
||||||
SELECT extractAllGroupsVertical('hello world', materialize('\\w+')); --{serverError 44} non-const needle
|
SELECT extractAllGroupsVertical('hello world', materialize('\\w+')); --{serverError 44} non-const needle
|
||||||
|
SELECT extractAllGroupsVertical('hello world', '(\\w+)', 123); --{serverError 42} only 2 arguments
|
||||||
SELECT '0 groups, zero matches';
|
SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 } 0 groups
|
||||||
SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 }
|
|
||||||
|
|
||||||
SELECT '1 group, multiple matches, String and FixedString';
|
SELECT '1 group, multiple matches, String and FixedString';
|
||||||
SELECT extractAllGroupsVertical('hello world', '(\\w+)');
|
SELECT extractAllGroupsVertical('hello world', '(\\w+)');
|
||||||
|
Loading…
Reference in New Issue
Block a user