From 0cd79cb7c01720b2abfd79157c0feca5f696059f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 23 Oct 2023 16:11:16 +0200 Subject: [PATCH 1/2] Prevent excesive memory usage when deserializing AggregateFunctionTopKGenericData --- src/AggregateFunctions/AggregateFunctionTopK.cpp | 12 ++++++------ src/AggregateFunctions/AggregateFunctionTopK.h | 15 ++++++++++++++- ...2_topKGeneric_deserialization_memory.reference | 0 .../02902_topKGeneric_deserialization_memory.sql | 7 +++++++ 4 files changed, 27 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.reference create mode 100644 tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 8f6652223cc..f7b3524d1b9 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -8,9 +8,6 @@ #include -static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF; - - namespace DB { @@ -134,9 +131,12 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const threshold = applyVisitor(FieldVisitorConvertToNumber(), params[0]); - if (threshold > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || threshold * load_factor > TOP_K_MAX_SIZE) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "Too large parameter(s) for aggregate function '{}' (maximum is {})", name, toString(TOP_K_MAX_SIZE)); + if (threshold > DB::TOP_K_MAX_SIZE || load_factor > DB::TOP_K_MAX_SIZE || threshold * load_factor > DB::TOP_K_MAX_SIZE) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Too large parameter(s) for aggregate function '{}' (maximum is {})", + name, + toString(DB::TOP_K_MAX_SIZE)); if (threshold == 0) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name); diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 89985c0ea6b..89c49b24530 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -20,6 +20,12 @@ namespace DB { struct Settings; +static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF; + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; +} template struct AggregateFunctionTopKData @@ -163,11 +169,18 @@ public: { auto & set = this->data(place).value; set.clear(); - set.resize(reserved); // Specialized here because there's no deserialiser for StringRef size_t size = 0; readVarUInt(size, buf); + if (unlikely(size > TOP_K_MAX_SIZE)) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Too large size ({}) for aggregate function '{}' state (maximum is {})", + size, + getName(), + TOP_K_MAX_SIZE); + set.resize(size); for (size_t i = 0; i < size; ++i) { auto ref = readStringBinaryInto(*arena, buf); diff --git a/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.reference b/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql b/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql new file mode 100644 index 00000000000..2427edb5416 --- /dev/null +++ b/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql @@ -0,0 +1,7 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/49706 +-- Using format Parquet for convenience so it errors out without output (but still deserializes the output) +-- Without the fix this would OOM the client when deserializing the state +SELECT + topKResampleState(1048576, 257, 65536, 10)(toString(number), number) +FROM numbers(3) +FORMAT Parquet; -- { clientError UNKNOWN_TYPE } From ef071ac3f87f879fc506bac919b0f2b789394e6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 23 Oct 2023 16:50:23 +0200 Subject: [PATCH 2/2] Fast tests don't build formats --- .../0_stateless/02902_topKGeneric_deserialization_memory.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql b/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql index 2427edb5416..3228810e0ba 100644 --- a/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql +++ b/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + -- https://github.com/ClickHouse/ClickHouse/issues/49706 -- Using format Parquet for convenience so it errors out without output (but still deserializes the output) -- Without the fix this would OOM the client when deserializing the state