Merge pull request #55947 from Algunenano/topk_generic_memory

Prevent excesive memory usage when deserializing AggregateFunctionTopKGenericData
This commit is contained in:
robot-clickhouse-ci-1 2023-10-23 22:27:54 +02:00 committed by GitHub
commit 2346f30095
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 7 deletions

View File

@ -8,9 +8,6 @@
#include <DataTypes/DataTypeIPv4andIPv6.h> #include <DataTypes/DataTypeIPv4andIPv6.h>
static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
namespace DB namespace DB
{ {
@ -134,9 +131,12 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
threshold = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]); threshold = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (threshold > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || threshold * load_factor > TOP_K_MAX_SIZE) if (threshold > DB::TOP_K_MAX_SIZE || load_factor > DB::TOP_K_MAX_SIZE || threshold * load_factor > DB::TOP_K_MAX_SIZE)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, throw Exception(
"Too large parameter(s) for aggregate function '{}' (maximum is {})", name, toString(TOP_K_MAX_SIZE)); ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large parameter(s) for aggregate function '{}' (maximum is {})",
name,
toString(DB::TOP_K_MAX_SIZE));
if (threshold == 0) if (threshold == 0)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name); throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name);

View File

@ -20,6 +20,12 @@ namespace DB
{ {
struct Settings; struct Settings;
static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
}
template <typename T> template <typename T>
struct AggregateFunctionTopKData struct AggregateFunctionTopKData
@ -163,11 +169,18 @@ public:
{ {
auto & set = this->data(place).value; auto & set = this->data(place).value;
set.clear(); set.clear();
set.resize(reserved);
// Specialized here because there's no deserialiser for StringRef // Specialized here because there's no deserialiser for StringRef
size_t size = 0; size_t size = 0;
readVarUInt(size, buf); readVarUInt(size, buf);
if (unlikely(size > TOP_K_MAX_SIZE))
throw Exception(
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large size ({}) for aggregate function '{}' state (maximum is {})",
size,
getName(),
TOP_K_MAX_SIZE);
set.resize(size);
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
auto ref = readStringBinaryInto(*arena, buf); auto ref = readStringBinaryInto(*arena, buf);

View File

@ -0,0 +1,9 @@
-- Tags: no-fasttest
-- https://github.com/ClickHouse/ClickHouse/issues/49706
-- Using format Parquet for convenience so it errors out without output (but still deserializes the output)
-- Without the fix this would OOM the client when deserializing the state
SELECT
topKResampleState(1048576, 257, 65536, 10)(toString(number), number)
FROM numbers(3)
FORMAT Parquet; -- { clientError UNKNOWN_TYPE }