Merge pull request #19393 from ClickHouse/extract-all-groups-fail-fast

Fail fast in incorrect usage of extractAllGroups
This commit is contained in:
alexey-milovidov 2021-01-22 17:24:13 +03:00 committed by GitHub
commit 3431e39587
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 4 deletions

View File

@ -14,12 +14,14 @@
#include <Core/iostream_debug_helpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int TOO_LARGE_ARRAY_SIZE;
}
@ -145,11 +147,11 @@ public:
}
else
{
std::vector<StringPiece> all_matches;
// number of times RE matched on each row of haystack column.
std::vector<size_t> number_of_matches_per_row;
PODArray<StringPiece, 0> all_matches;
/// Number of times RE matched on each row of haystack column.
PODArray<size_t, 0> number_of_matches_per_row;
// we expect RE to match multiple times on each row, `* 8` is arbitrary to reduce number of re-allocations.
/// We expect RE to match multiple times on each row, `* 8` is arbitrary to reduce number of re-allocations.
all_matches.reserve(input_rows_count * groups_count * 8);
number_of_matches_per_row.reserve(input_rows_count);
@ -170,6 +172,12 @@ public:
for (size_t group = 1; group <= groups_count; ++group)
all_matches.push_back(matched_groups[group]);
/// Additional limit to fail fast on supposedly incorrect usage.
static constexpr size_t MAX_GROUPS_PER_ROW = 1000000;
if (all_matches.size() > MAX_GROUPS_PER_ROW)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
pos = matched_groups[0].data() + std::max<size_t>(1, matched_groups[0].size());
++matches_per_row;

View File

@ -0,0 +1 @@
SELECT repeat('abcdefghijklmnopqrstuvwxyz', number * 100) AS haystack, extractAllGroupsHorizontal(haystack, '(\\w)') AS matches FROM numbers(1023); -- { serverError 128 }