From ec77ba8bfcaa506a82d0bb4b6ad94bcf2146c1aa Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 29 Jul 2021 15:36:55 +0300 Subject: [PATCH] Updated extractAllGroupsHorizontal - flexible limit on number of matches per row. If it is not set via third argument, it deafults to previously hardcoded value 1000. --- src/Functions/extractAllGroups.h | 24 ++++++++++++------- ...01246_extractAllGroupsHorizontal.reference | 1 - .../01246_extractAllGroupsHorizontal.sql | 8 +++++-- .../01246_extractAllGroupsVertical.reference | 1 - .../01246_extractAllGroupsVertical.sql | 5 ++-- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index 864a788cf18..5323d8ba9b0 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -55,7 +55,8 @@ public: String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 2; } + size_t getNumberOfArguments() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL ? 0 : 2; } + bool isVariadic() const override { return Kind == ExtractAllGroupsResultKind::HORIZONTAL; } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } @@ -66,7 +67,13 @@ public: {"haystack", isStringOrFixedString, nullptr, "const String or const FixedString"}, {"needle", isStringOrFixedString, isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + FunctionArgumentDescriptors optional_args; + if constexpr (Kind == ExtractAllGroupsResultKind::HORIZONTAL) + { + optional_args.push_back(FunctionArgumentDescriptor{"max_matches_per_row", isUnsignedInteger, isColumnConst, "const Unsigned Int"}); + } + + validateFunctionArgumentTypes(*this, arguments, args, optional_args); /// Two-dimensional array of strings, each `row` of top array represents matching groups. return std::make_shared(std::make_shared(std::make_shared())); @@ -147,6 +154,10 @@ public: } else { + /// Additional limit to fail fast on supposedly incorrect usage, arbitrary value. + static constexpr size_t MAX_MATCHES_PER_ROW = 1000; + const auto max_matches_per_row = arguments.size() >= 3 ? arguments[2].column->getUInt(0) : MAX_MATCHES_PER_ROW; + PODArray all_matches; /// Number of times RE matched on each row of haystack column. PODArray number_of_matches_per_row; @@ -172,16 +183,13 @@ public: for (size_t group = 1; group <= groups_count; ++group) all_matches.push_back(matched_groups[group]); - /// Additional limit to fail fast on supposedly incorrect usage, arbitrary value. - static constexpr size_t MAX_MATCHES_PER_ROW = 1000; - if (matches_per_row > MAX_MATCHES_PER_ROW) + ++matches_per_row; + if (matches_per_row > max_matches_per_row) throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too many matches per row (> {}) in the result of function {}", - MAX_MATCHES_PER_ROW, getName()); + max_matches_per_row, getName()); pos = matched_groups[0].data() + std::max(1, matched_groups[0].size()); - - ++matches_per_row; } number_of_matches_per_row.push_back(matches_per_row); diff --git a/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.reference b/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.reference index 13e717485d8..2aed6b7a3c0 100644 --- a/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.reference +++ b/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.reference @@ -1,4 +1,3 @@ -0 groups, zero matches 1 group, multiple matches, String and FixedString [['hello','world']] [['hello','world']] diff --git a/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.sql b/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.sql index b7a71415a9d..ef1159e27d4 100644 --- a/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.sql +++ b/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.sql @@ -5,9 +5,13 @@ SELECT extractAllGroupsHorizontal('hello', 123); --{serverError 43} invalid arg SELECT extractAllGroupsHorizontal(123, 'world'); --{serverError 43} invalid argument type SELECT extractAllGroupsHorizontal('hello world', '((('); --{serverError 427} invalid re SELECT extractAllGroupsHorizontal('hello world', materialize('\\w+')); --{serverError 44} non-cons needle +SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 'foobar'); --{serverError 43} invalid argument type +SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', materialize(10)); --{serverError 44} non-const max_matches_per_row +SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 } 0 groups +SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 0); -- { serverError 128 } to many groups matched per row +SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1); -- { serverError 128 } to many groups matched per row -SELECT '0 groups, zero matches'; -SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 } +SELECT extractAllGroupsHorizontal('hello world', '(\\w+)', 1000000000) FORMAT Null; -- users now can set limit bigger than previous 1000 matches per row SELECT '1 group, multiple matches, String and FixedString'; SELECT extractAllGroupsHorizontal('hello world', '(\\w+)'); diff --git a/tests/queries/0_stateless/01246_extractAllGroupsVertical.reference b/tests/queries/0_stateless/01246_extractAllGroupsVertical.reference index 983e3838ee5..80b0bf2884d 100644 --- a/tests/queries/0_stateless/01246_extractAllGroupsVertical.reference +++ b/tests/queries/0_stateless/01246_extractAllGroupsVertical.reference @@ -1,4 +1,3 @@ -0 groups, zero matches 1 group, multiple matches, String and FixedString [['hello'],['world']] [['hello'],['world']] diff --git a/tests/queries/0_stateless/01246_extractAllGroupsVertical.sql b/tests/queries/0_stateless/01246_extractAllGroupsVertical.sql index 8edc3f3e741..aff3dc196df 100644 --- a/tests/queries/0_stateless/01246_extractAllGroupsVertical.sql +++ b/tests/queries/0_stateless/01246_extractAllGroupsVertical.sql @@ -5,9 +5,8 @@ SELECT extractAllGroupsVertical('hello', 123); --{serverError 43} invalid argum SELECT extractAllGroupsVertical(123, 'world'); --{serverError 43} invalid argument type SELECT extractAllGroupsVertical('hello world', '((('); --{serverError 427} invalid re SELECT extractAllGroupsVertical('hello world', materialize('\\w+')); --{serverError 44} non-const needle - -SELECT '0 groups, zero matches'; -SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 } +SELECT extractAllGroupsVertical('hello world', '(\\w+)', 123); --{serverError 42} only 2 arguments +SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 } 0 groups SELECT '1 group, multiple matches, String and FixedString'; SELECT extractAllGroupsVertical('hello world', '(\\w+)');