From 2c40dd6a4c6aaa956762b8709fe5e4686e16876c Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 2 May 2023 14:36:02 +0200
Subject: [PATCH 1/9] Switch Block::NameMap to google::dense_hash_map over
 HashMap

Since HashMap creates 2^8 elements by default, while dense_hash_map
should be good here.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Access/AuthenticationData.cpp                   |  1 +
 src/AggregateFunctions/AggregateFunctionForEach.h   |  1 +
 src/AggregateFunctions/AggregateFunctionMinMaxAny.h |  1 +
 src/CMakeLists.txt                                  |  3 ++-
 src/Core/Block.cpp                                  |  6 ++----
 src/Core/Block.h                                    |  6 ++----
 .../SerializationAggregateFunction.cpp              |  1 +
 src/Formats/ColumnMapping.cpp                       |  6 +++---
 src/Functions/FunctionBinaryArithmetic.h            |  1 +
 src/Interpreters/QueryLog.h                         |  1 +
 src/Interpreters/QueryViewsLog.h                    |  1 +
 src/Interpreters/tests/gtest_context_race.cpp       |  1 +
 .../Formats/Impl/BSONEachRowRowInputFormat.cpp      | 12 +++++++-----
 .../Formats/Impl/BSONEachRowRowInputFormat.h        |  2 +-
 .../Impl/JSONColumnsBlockInputFormatBase.cpp        |  2 +-
 .../Formats/Impl/JSONEachRowRowInputFormat.cpp      | 13 ++++++-------
 .../Formats/Impl/JSONEachRowRowInputFormat.h        |  5 ++---
 .../QueryPlan/DistributedCreateLocalPlan.h          |  1 +
 src/Storages/DataLakes/DeltaLakeMetadataParser.cpp  |  5 +++--
 src/Storages/DataLakes/HudiMetadataParser.cpp       |  1 +
 src/Storages/DataLakes/IcebergMetadataParser.cpp    |  1 +
 src/Storages/StorageS3.h                            |  1 +
 22 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp
index 409338209cc..3bb0be160f4 100644
--- a/src/Access/AuthenticationData.cpp
+++ b/src/Access/AuthenticationData.cpp
@@ -10,6 +10,7 @@
 #include <Common/OpenSSLHelpers.h>
 #include <Poco/SHA1Engine.h>
 #include <base/types.h>
+#include <base/hex.h>
 #include <boost/algorithm/hex.hpp>
 #include <boost/algorithm/string/case_conv.hpp>
 
diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h
index 81ba298bb8a..480b4cc690e 100644
--- a/src/AggregateFunctions/AggregateFunctionForEach.h
+++ b/src/AggregateFunctions/AggregateFunctionForEach.h
@@ -2,6 +2,7 @@
 
 #include <Columns/ColumnArray.h>
 #include <Common/assert_cast.h>
+#include <Common/Arena.h>
 #include <base/arithmeticOverflow.h>
 #include <DataTypes/DataTypeArray.h>
 #include <AggregateFunctions/IAggregateFunction.h>
diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index c2cf2ac418f..94c0d60be81 100644
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -10,6 +10,7 @@
 #include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <base/StringRef.h>
+#include <Common/Arena.h>
 #include <Common/assert_cast.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <AggregateFunctions/IAggregateFunction.h>
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b3f4fbb7420..c04b7acad3d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -211,6 +211,7 @@ endif()
 if (TARGET ch_contrib::jemalloc)
     target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc)
 endif()
+target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
 
 add_subdirectory(Access/Common)
 add_subdirectory(Common/ZooKeeper)
@@ -463,7 +464,7 @@ endif ()
 if (TARGET ch_contrib::ldap)
     dbms_target_link_libraries (PRIVATE ch_contrib::ldap ch_contrib::lber)
 endif ()
-dbms_target_link_libraries (PRIVATE ch_contrib::sparsehash)
+dbms_target_link_libraries (PUBLIC ch_contrib::sparsehash)
 
 if (TARGET ch_contrib::protobuf)
     dbms_target_link_libraries (PRIVATE ch_contrib::protobuf)
diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp
index 456f1d5d95e..a3bd29faab1 100644
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@@ -663,12 +663,10 @@ Names Block::getDataTypeNames() const
 
 Block::NameMap Block::getNamesToIndexesMap() const
 {
-    NameMap res;
-    res.reserve(index_by_name.size());
-
+    NameMap res(index_by_name.size());
+    res.set_empty_key(StringRef{});
     for (const auto & [name, index] : index_by_name)
         res[name] = index;
-
     return res;
 }
 
diff --git a/src/Core/Block.h b/src/Core/Block.h
index eb9d57ea6f8..7eed48d3d9f 100644
--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@@ -5,13 +5,11 @@
 #include <Core/ColumnsWithTypeAndName.h>
 #include <Core/NamesAndTypes.h>
 
-#include <Common/HashTable/HashMap.h>
-
 #include <initializer_list>
 #include <list>
-#include <map>
 #include <set>
 #include <vector>
+#include <sparsehash/dense_hash_map>
 
 
 namespace DB
@@ -97,7 +95,7 @@ public:
     Names getDataTypeNames() const;
 
     /// Hash table match `column name -> position in the block`.
-    using NameMap = HashMap<StringRef, size_t, StringRefHash>;
+    using NameMap = ::google::dense_hash_map<StringRef, size_t, StringRefHash>;
     NameMap getNamesToIndexesMap() const;
 
     Serializations getSerializations() const;
diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
index 7e192595114..c482c9623e9 100644
--- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
+++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
@@ -7,6 +7,7 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/AlignedBuffer.h>
+#include <Common/Arena.h>
 
 #include <Formats/FormatSettings.h>
 #include <Formats/ProtobufReader.h>
diff --git a/src/Formats/ColumnMapping.cpp b/src/Formats/ColumnMapping.cpp
index 2fab5766591..e33dfc878f4 100644
--- a/src/Formats/ColumnMapping.cpp
+++ b/src/Formats/ColumnMapping.cpp
@@ -26,8 +26,8 @@ void ColumnMapping::addColumns(
     {
         names_of_columns.push_back(name);
 
-        const auto * column_it = column_indexes_by_names.find(name);
-        if (!column_it)
+        const auto column_it = column_indexes_by_names.find(name);
+        if (column_it == column_indexes_by_names.end())
         {
             if (settings.skip_unknown_fields)
             {
@@ -43,7 +43,7 @@ void ColumnMapping::addColumns(
                             name, column_indexes_for_input_fields.size());
         }
 
-        const auto column_index = column_it->getMapped();
+        const auto column_index = column_it->second;
 
         if (read_columns[column_index])
             throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate field found while parsing format header: {}", name);
diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index a83bc6382b6..0174899d767 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -41,6 +41,7 @@
 #include <Common/FieldVisitorsAccurateComparison.h>
 #include <Common/assert_cast.h>
 #include <Common/typeid_cast.h>
+#include <Common/Arena.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <Interpreters/Context.h>
 
diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h
index a2d434e8843..44780f530e0 100644
--- a/src/Interpreters/QueryLog.h
+++ b/src/Interpreters/QueryLog.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Common/ProfileEvents.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/NamesAndAliases.h>
 #include <Core/Settings.h>
diff --git a/src/Interpreters/QueryViewsLog.h b/src/Interpreters/QueryViewsLog.h
index 986311fc822..e28bce0b91c 100644
--- a/src/Interpreters/QueryViewsLog.h
+++ b/src/Interpreters/QueryViewsLog.h
@@ -13,6 +13,7 @@
 #include <Core/NamesAndAliases.h>
 #include <Interpreters/SystemLog.h>
 #include <base/types.h>
+#include <Common/ProfileEvents.h>
 
 namespace ProfileEvents
 {
diff --git a/src/Interpreters/tests/gtest_context_race.cpp b/src/Interpreters/tests/gtest_context_race.cpp
index 60531494592..ec61fc9467c 100644
--- a/src/Interpreters/tests/gtest_context_race.cpp
+++ b/src/Interpreters/tests/gtest_context_race.cpp
@@ -1,6 +1,7 @@
 #include <Interpreters/Context.h>
 #include <Common/tests/gtest_global_context.h>
 #include <gtest/gtest.h>
+#include <thread>
 
 using namespace DB;
 
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
index 370cddd2dcb..57598fb507f 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@@ -64,20 +64,22 @@ inline size_t BSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz
     /// Optimization by caching the order of fields (which is almost always the same)
     /// and a quick check to match the next expected field, instead of searching the hash table.
 
-    if (prev_positions.size() > key_index && prev_positions[key_index] && name == prev_positions[key_index]->getKey())
+    if (prev_positions.size() > key_index
+        && prev_positions[key_index] != Block::NameMap::const_iterator{}
+        && name == prev_positions[key_index]->first)
     {
-        return prev_positions[key_index]->getMapped();
+        return prev_positions[key_index]->second;
     }
     else
     {
-        auto * it = name_map.find(name);
+        const auto it = name_map.find(name);
 
-        if (it)
+        if (it != name_map.end())
         {
             if (key_index < prev_positions.size())
                 prev_positions[key_index] = it;
 
-            return it->getMapped();
+            return it->second;
         }
         else
             return UNKNOWN_FIELD;
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h
index ad6d712b6dd..538a59e05c3 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h
@@ -91,7 +91,7 @@ private:
     Block::NameMap name_map;
 
     /// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
-    std::vector<Block::NameMap::LookupResult> prev_positions;
+    std::vector<Block::NameMap::const_iterator> prev_positions;
 
     DataTypes types;
 
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
index 3d052e23c21..2e264c59f56 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
@@ -128,7 +128,7 @@ Chunk JSONColumnsBlockInputFormatBase::generate()
         {
             /// Check if this name appears in header. If no, skip this column or throw
             /// an exception according to setting input_format_skip_unknown_fields
-            if (!name_to_index.has(*column_name))
+            if (name_to_index.find(*column_name) == name_to_index.end())
             {
                 if (!format_settings.skip_unknown_fields)
                     throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column found in input data: {}", *column_name);
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 22ac31c7824..e5f52936021 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -71,21 +71,20 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(StringRef name, size_t key_
     /// and a quick check to match the next expected field, instead of searching the hash table.
 
     if (prev_positions.size() > key_index
-        && prev_positions[key_index]
-        && name == prev_positions[key_index]->getKey())
+        && prev_positions[key_index] != Block::NameMap::const_iterator{}
+        && name == prev_positions[key_index]->first)
     {
-        return prev_positions[key_index]->getMapped();
+        return prev_positions[key_index]->second;
     }
     else
     {
-        auto * it = name_map.find(name);
-
-        if (it)
+        const auto it = name_map.find(name);
+        if (it != name_map.end())
         {
             if (key_index < prev_positions.size())
                 prev_positions[key_index] = it;
 
-            return it->getMapped();
+            return it->second;
         }
         else
             return UNKNOWN_FIELD;
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
index beee9e95821..ce42071585e 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
@@ -71,11 +71,10 @@ private:
     /// for row like {..., "non-nullable column name" : null, ...}
 
     /// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map.
-    using NameMap = HashMap<StringRef, size_t, StringRefHash>;
-    NameMap name_map;
+    Block::NameMap name_map;
 
     /// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
-    std::vector<NameMap::LookupResult> prev_positions;
+    std::vector<Block::NameMap::const_iterator> prev_positions;
 
     bool allow_new_rows = true;
 
diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h
index 16bf1c565ff..1afdc07fa4d 100644
--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Core/QueryProcessingStage.h>
+#include <Core/UUID.h>
 #include <Parsers/IAST_fwd.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/ResizeProcessor.h>
diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
index 45189ca325a..309aa54909a 100644
--- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
+++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
@@ -11,10 +11,11 @@
 #include <Processors/Formats/Impl/ParquetBlockInputFormat.h>
 #include <Processors/Formats/Impl/ArrowColumnToCHColumn.h>
 #include <Formats/FormatFactory.h>
-#include <boost/algorithm/string/case_conv.hpp>
-#include <parquet/arrow/reader.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnNullable.h>
+#include <IO/ReadHelpers.h>
+#include <boost/algorithm/string/case_conv.hpp>
+#include <parquet/arrow/reader.h>
 #include <ranges>
 
 namespace fs = std::filesystem;
diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp
index 74e556f2bf2..a1f35a5ae42 100644
--- a/src/Storages/DataLakes/HudiMetadataParser.cpp
+++ b/src/Storages/DataLakes/HudiMetadataParser.cpp
@@ -1,6 +1,7 @@
 #include <Storages/DataLakes/HudiMetadataParser.h>
 #include <Common/logger_useful.h>
 #include <ranges>
+#include <base/find_symbols.h>
 #include <Poco/String.h>
 #include "config.h"
 #include <filesystem>
diff --git a/src/Storages/DataLakes/IcebergMetadataParser.cpp b/src/Storages/DataLakes/IcebergMetadataParser.cpp
index 4e90da41721..3ab90e271cf 100644
--- a/src/Storages/DataLakes/IcebergMetadataParser.cpp
+++ b/src/Storages/DataLakes/IcebergMetadataParser.cpp
@@ -12,6 +12,7 @@
 #include <Storages/StorageS3.h>
 #include <Processors/Formats/Impl/AvroRowInputFormat.h>
 #include <Formats/FormatFactory.h>
+#include <IO/ReadHelpers.h>
 
 #include <Poco/JSON/Array.h>
 #include <Poco/JSON/Object.h>
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index c3b862f6bbd..12573ab513f 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -16,6 +16,7 @@
 #include <Poco/URI.h>
 #include <IO/S3/getObjectInfo.h>
 #include <IO/CompressionMethod.h>
+#include <IO/SeekableReadBuffer.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Storages/Cache/SchemaCache.h>

From fcce60ad25243f128d97a264cdec4c025e67a1b3 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Fri, 12 May 2023 15:09:34 +0200
Subject: [PATCH 2/9] Randomize enable_multiple_prewhere_read_steps

---
 tests/clickhouse-test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e279b899a93..09bc15fbe3d 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -530,6 +530,7 @@ class SettingsRandomizer:
         "max_threads": lambda: random.randint(1, 64),
         "optimize_or_like_chain": lambda: random.randint(0, 1),
         "optimize_read_in_order": lambda: random.randint(0, 1),
+        "enable_multiple_prewhere_read_steps": lambda: random.randint(0, 1),
         "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
         "optimize_aggregation_in_order": lambda: random.randint(0, 1),
         "aggregation_in_order_max_block_bytes": lambda: random.randint(0, 50000000),

From 1e3b7af97a5845c0dd02299a40e65bad5e6468fb Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Fri, 12 May 2023 10:26:05 -0300
Subject: [PATCH 3/9] Add setting to limit the max number of pairs produced by
 extractKeyValuePairs

---
 src/Common/ErrorCodes.cpp                     |  1 +
 src/Core/Settings.h                           |  1 +
 .../keyvaluepair/extractKeyValuePairs.cpp     | 13 ++++++--
 .../impl/CHKeyValuePairExtractor.h            | 15 +++++++---
 .../impl/KeyValuePairExtractorBuilder.cpp     | 14 ++++++---
 .../impl/KeyValuePairExtractorBuilder.h       |  3 ++
 ...t_key_value_pairs_multiple_input.reference | 16 ++++++++++
 ...extract_key_value_pairs_multiple_input.sql | 30 +++++++++++++++++++
 8 files changed, 82 insertions(+), 11 deletions(-)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index e9dc5649245..382b8ed8019 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -576,6 +576,7 @@
     M(691, UNKNOWN_ELEMENT_OF_ENUM) \
     M(692, TOO_MANY_MUTATIONS) \
     M(693, AWS_ERROR) \
+    M(694, TOO_LARGE_MAP_SIZE) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index f60632fae91..3d1640e21b1 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -748,6 +748,7 @@ class IColumn;
     M(Bool, optimize_distinct_in_order, false, "This optimization has a bug and it is disabled. Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
     M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
     M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
+    M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
     // End of COMMON_SETTINGS
     // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
 
diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
index a1b140001ac..00588052870 100644
--- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
+++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
@@ -7,6 +7,8 @@
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>
 
+#include <Interpreters/Context.h>
+
 #include <Functions/keyvaluepair/impl/KeyValuePairExtractor.h>
 #include <Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h>
 #include <Functions/keyvaluepair/ArgumentExtractor.h>
@@ -41,6 +43,8 @@ class ExtractKeyValuePairs : public IFunction
             builder.withQuotingCharacter(parsed_arguments.quoting_character.value());
         }
 
+        builder.withMaxNumberOfPairs(context->getSettingsRef().extract_kvp_max_pairs_per_row);
+
         return builder.build();
     }
 
@@ -73,7 +77,7 @@ class ExtractKeyValuePairs : public IFunction
     }
 
 public:
-    ExtractKeyValuePairs() = default;
+    explicit ExtractKeyValuePairs(ContextPtr context_) : context(context_) {}
 
     static constexpr auto name = Name::name;
 
@@ -82,9 +86,9 @@ public:
         return name;
     }
 
-    static FunctionPtr create(ContextPtr)
+    static FunctionPtr create(ContextPtr context)
     {
-        return std::make_shared<ExtractKeyValuePairs>();
+        return std::make_shared<ExtractKeyValuePairs>(context);
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
@@ -120,6 +124,9 @@ public:
     {
         return {1, 2, 3, 4};
     }
+
+private:
+    ContextPtr context;
 };
 
 struct NameExtractKeyValuePairs
diff --git a/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h b/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
index 139417579de..24c8d3cd89a 100644
--- a/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
+++ b/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
@@ -13,6 +13,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int TOO_LARGE_MAP_SIZE;
 }
 
 /*
@@ -25,8 +26,8 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
     using NextState = DB::extractKV::StateHandler::NextState;
 
 public:
-    explicit CHKeyValuePairExtractor(StateHandler state_handler_)
-        : state_handler(std::move(state_handler_))
+    explicit CHKeyValuePairExtractor(StateHandler state_handler_, uint64_t max_number_of_pairs_)
+        : state_handler(std::move(state_handler_)), max_number_of_pairs(max_number_of_pairs_)
     {}
 
     uint64_t extract(const std::string & data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values) override
@@ -113,11 +114,16 @@ private:
     NextState flushPair(const std::string_view & file, auto & key,
                         auto & value, uint64_t & row_offset)
     {
+        row_offset++;
+
+        if (row_offset > max_number_of_pairs)
+        {
+            throw Exception(ErrorCodes::TOO_LARGE_MAP_SIZE, "Number of pairs produced exceeded the limit of {}", max_number_of_pairs);
+        }
+
         key.commit();
         value.commit();
 
-        row_offset++;
-
         return {0, file.empty() ? State::END : State::WAITING_KEY};
     }
 
@@ -128,6 +134,7 @@ private:
     }
 
     StateHandler state_handler;
+    uint64_t max_number_of_pairs;
 };
 
 }
diff --git a/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.cpp b/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.cpp
index 188285dc4bd..7f2a6449ab0 100644
--- a/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.cpp
+++ b/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.cpp
@@ -31,6 +31,12 @@ KeyValuePairExtractorBuilder & KeyValuePairExtractorBuilder::withEscaping()
     return *this;
 }
 
+KeyValuePairExtractorBuilder & KeyValuePairExtractorBuilder::withMaxNumberOfPairs(uint64_t max_number_of_pairs_)
+{
+    max_number_of_pairs = max_number_of_pairs_;
+    return *this;
+}
+
 std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::build() const
 {
     if (with_escaping)
@@ -46,9 +52,9 @@ namespace
 using namespace extractKV;
 
 template <typename T>
-auto makeStateHandler(const T && handler)
+auto makeStateHandler(const T && handler, uint64_t max_number_of_pairs)
 {
-    return std::make_shared<CHKeyValuePairExtractor<T>>(handler);
+    return std::make_shared<CHKeyValuePairExtractor<T>>(handler, max_number_of_pairs);
 }
 
 }
@@ -57,14 +63,14 @@ std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::buildWithou
 {
     auto configuration = ConfigurationFactory::createWithoutEscaping(key_value_delimiter, quoting_character, item_delimiters);
 
-    return makeStateHandler(NoEscapingStateHandler(configuration));
+    return makeStateHandler(NoEscapingStateHandler(configuration), max_number_of_pairs);
 }
 
 std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::buildWithEscaping() const
 {
     auto configuration = ConfigurationFactory::createWithEscaping(key_value_delimiter, quoting_character, item_delimiters);
 
-    return makeStateHandler(InlineEscapingStateHandler(configuration));
+    return makeStateHandler(InlineEscapingStateHandler(configuration), max_number_of_pairs);
 }
 
 }
diff --git a/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h b/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h
index 5746c58dccc..0c673f12ccf 100644
--- a/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h
+++ b/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h
@@ -20,6 +20,8 @@ public:
 
     KeyValuePairExtractorBuilder & withEscaping();
 
+    KeyValuePairExtractorBuilder & withMaxNumberOfPairs(uint64_t max_number_of_pairs_);
+
     std::shared_ptr<KeyValuePairExtractor> build() const;
 
 private:
@@ -27,6 +29,7 @@ private:
     char key_value_delimiter = ':';
     char quoting_character = '"';
     std::vector<char> item_delimiters = {' ', ',', ';'};
+    uint64_t max_number_of_pairs = std::numeric_limits<uint64_t>::max();
 
     std::shared_ptr<KeyValuePairExtractor> buildWithEscaping() const;
 
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
index 125afa19427..ec51b61a382 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
@@ -292,6 +292,22 @@ SELECT
     x;
 {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'}
 -- { echoOn }
+
+SET extract_kvp_max_pairs_per_row = 2;
+-- Should be allowed because it no longer exceeds the max number of pairs
+-- expected output: {'key1':'value1','key2':'value2'}
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x;
+{'key1':'value1','key2':'value2'}
+SET extract_kvp_max_pairs_per_row = 99999;
 -- should not fail because pair delimiters contains 8 characters, which is within the limit
 WITH
     extractKeyValuePairs('not_important', ':', '12345678', '\'') AS s_map,
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
index 0a1500b1796..7dfcae879b0 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@@ -414,7 +414,37 @@ WITH
 SELECT
     x; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
 
+-- Should fail allowed because it exceeds the max number of pairs
+SET extract_kvp_max_pairs_per_row = 1;
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x; -- {serverError TOO_LARGE_MAP_SIZE}
+
 -- { echoOn }
+
+SET extract_kvp_max_pairs_per_row = 2;
+-- Should be allowed because it no longer exceeds the max number of pairs
+-- expected output: {'key1':'value1','key2':'value2'}
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x;
+
+SET extract_kvp_max_pairs_per_row = 99999;
+
 -- should not fail because pair delimiters contains 8 characters, which is within the limit
 WITH
     extractKeyValuePairs('not_important', ':', '12345678', '\'') AS s_map,

From b1549a19a50048a93f5cc0b48c482160fcb66613 Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Fri, 12 May 2023 11:19:35 -0300
Subject: [PATCH 4/9] Use 0 as unlimited

---
 .../keyvaluepair/extractKeyValuePairs.cpp         |  7 ++++++-
 ...tract_key_value_pairs_multiple_input.reference | 15 ++++++++++++++-
 ...499_extract_key_value_pairs_multiple_input.sql | 14 +++++++++++++-
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
index 00588052870..9956eaf01f1 100644
--- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
+++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
@@ -43,7 +43,12 @@ class ExtractKeyValuePairs : public IFunction
             builder.withQuotingCharacter(parsed_arguments.quoting_character.value());
         }
 
-        builder.withMaxNumberOfPairs(context->getSettingsRef().extract_kvp_max_pairs_per_row);
+        bool is_number_of_pairs_unlimited = context->getSettingsRef().extract_kvp_max_pairs_per_row == 0;
+
+        if (!is_number_of_pairs_unlimited)
+        {
+            builder.withMaxNumberOfPairs(context->getSettingsRef().extract_kvp_max_pairs_per_row);
+        }
 
         return builder.build();
     }
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
index ec51b61a382..a4ff1464fb8 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
@@ -307,7 +307,20 @@ WITH
 SELECT
     x;
 {'key1':'value1','key2':'value2'}
-SET extract_kvp_max_pairs_per_row = 99999;
+SET extract_kvp_max_pairs_per_row = 0;
+-- Should be allowed because max pairs per row is set to 0 (unlimited)
+-- expected output: {'key1':'value1','key2':'value2'}
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x;
+{'key1':'value1','key2':'value2'}
 -- should not fail because pair delimiters contains 8 characters, which is within the limit
 WITH
     extractKeyValuePairs('not_important', ':', '12345678', '\'') AS s_map,
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
index 7dfcae879b0..8c5f0365cc7 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@@ -443,7 +443,19 @@ WITH
 SELECT
     x;
 
-SET extract_kvp_max_pairs_per_row = 99999;
+SET extract_kvp_max_pairs_per_row = 0;
+-- Should be allowed because max pairs per row is set to 0 (unlimited)
+-- expected output: {'key1':'value1','key2':'value2'}
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x;
 
 -- should not fail because pair delimiters contains 8 characters, which is within the limit
 WITH

From 2b793e3a14a850197ffbb23b76389b54f565586c Mon Sep 17 00:00:00 2001
From: darkkeks <darkkeks@rambler.ru>
Date: Sat, 13 May 2023 16:22:17 +0300
Subject: [PATCH 5/9] [docs] Remove "example" section from date-time-functions
 page toc

---
 .../functions/date-time-functions.md          | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 599b64ef43f..4710a5617c3 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1218,12 +1218,16 @@ Rounds the time to the half hour.
 
 Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
 
-### example
-```sql
+**Example**
+
+``` sql
 SELECT
     toYYYYMM(now(), 'US/Eastern')
 ```
-```response
+
+Result:
+
+``` text
 ┌─toYYYYMM(now(), 'US/Eastern')─┐
 │                        202303 │
 └───────────────────────────────┘
@@ -1233,11 +1237,15 @@ SELECT
 
 Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
 
-### example
+**Example**
+
 ```sql
 SELECT
     toYYYYMMDD(now(), 'US/Eastern')
 ```
+
+Result:
+
 ```response
 ┌─toYYYYMMDD(now(), 'US/Eastern')─┐
 │                        20230302 │
@@ -1248,11 +1256,15 @@ SELECT
 
 Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
 
-### example
+**Example**
+
 ```sql
 SELECT
     toYYYYMMDDhhmmss(now(), 'US/Eastern')
 ```
+
+Result:
+
 ```response
 ┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
 │                        20230302112209 │

From 1db35384d969f9239de84563927494148100c6d5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 15 May 2023 03:30:03 +0200
Subject: [PATCH 6/9] Support `bitCount` for big integers

---
 src/Functions/bitCount.cpp                    |  9 ++++++++-
 .../02736_bit_count_big_int.reference         | 13 +++++++++++++
 .../0_stateless/02736_bit_count_big_int.sql   | 19 +++++++++++++++++++
 3 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02736_bit_count_big_int.reference
 create mode 100644 tests/queries/0_stateless/02736_bit_count_big_int.sql

diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp
index 984f33b7001..566a11481be 100644
--- a/src/Functions/bitCount.cpp
+++ b/src/Functions/bitCount.cpp
@@ -9,7 +9,7 @@ namespace DB
 template <typename A>
 struct BitCountImpl
 {
-    using ResultType = UInt8;
+    using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>;
     static constexpr bool allow_string_or_fixed_string = true;
 
     static inline ResultType apply(A a)
@@ -17,6 +17,13 @@ struct BitCountImpl
         /// We count bits in the value representation in memory. For example, we support floats.
         /// We need to avoid sign-extension when converting signed numbers to larger type. So, uint8_t(-1) has 8 bits.
 
+        if constexpr (is_big_int_v<A>)
+        {
+            ResultType res = 0;
+            for (auto item : a.items)
+                res += __builtin_popcountll(item);
+            return res;
+        }
         if constexpr (std::is_same_v<A, UInt64> || std::is_same_v<A, Int64>)
             return __builtin_popcountll(a);
         if constexpr (std::is_same_v<A, UInt32> || std::is_same_v<A, Int32> || std::is_unsigned_v<A>)
diff --git a/tests/queries/0_stateless/02736_bit_count_big_int.reference b/tests/queries/0_stateless/02736_bit_count_big_int.reference
new file mode 100644
index 00000000000..a3a725ace69
--- /dev/null
+++ b/tests/queries/0_stateless/02736_bit_count_big_int.reference
@@ -0,0 +1,13 @@
+128
+256
+128
+256
+127
+255
+126
+255
+64
+UInt8
+UInt16
+UInt8
+UInt16
diff --git a/tests/queries/0_stateless/02736_bit_count_big_int.sql b/tests/queries/0_stateless/02736_bit_count_big_int.sql
new file mode 100644
index 00000000000..35a4a641606
--- /dev/null
+++ b/tests/queries/0_stateless/02736_bit_count_big_int.sql
@@ -0,0 +1,19 @@
+SELECT bitCount(CAST(-1 AS UInt128));
+SELECT bitCount(CAST(-1 AS UInt256));
+
+SELECT bitCount(CAST(-1 AS Int128));
+SELECT bitCount(CAST(-1 AS Int256));
+
+SELECT bitCount(CAST(-1 AS UInt128) - 1);
+SELECT bitCount(CAST(-1 AS UInt256) - 2);
+
+SELECT bitCount(CAST(-1 AS Int128) - 3);
+SELECT bitCount(CAST(-1 AS Int256) - 4);
+
+SELECT bitCount(CAST(0xFFFFFFFFFFFFFFFF AS Int256));
+
+SELECT toTypeName(bitCount(1::UInt128));
+SELECT toTypeName(bitCount(1::UInt256));
+
+SELECT toTypeName(bitCount(1::Int128));
+SELECT toTypeName(bitCount(1::Int256));

From 4764259f609a024f04ec8c082b63a495efc66d70 Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Mon, 15 May 2023 14:17:16 +0800
Subject: [PATCH 7/9] Fix a bug with projections and the
 aggregate_functions_null_for_empty setting (for
 query_plan_optimize_projection)

Fix a bug with projections and the aggregate_functions_null_for_empty
setting. This was already fixed in PR #42198 but got forgotten after
using query_plan_optimize_projection.
---
 .../QueryPlan/Optimizations/projectionsCommon.cpp         | 4 ++++
 ...rojection_aggregate_functions_null_for_empty.reference | 1 +
 ...1710_projection_aggregate_functions_null_for_empty.sql | 8 ++++++++
 3 files changed, 13 insertions(+)
 create mode 100644 tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference
 create mode 100644 tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql

diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
index a334450fb41..9252137f649 100644
--- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
+++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
@@ -42,6 +42,10 @@ bool canUseProjectionForReadingStep(ReadFromMergeTree * reading)
     if (reading->getContext()->getSettingsRef().allow_experimental_query_deduplication)
         return false;
 
+    // Currently projection don't support settings which implicitly modify aggregate functions.
+    if (reading->getContext()->getSettingsRef().aggregate_functions_null_for_empty)
+        return false;
+
     return true;
 }
 
diff --git a/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference
new file mode 100644
index 00000000000..f2a527c4d8d
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference
@@ -0,0 +1 @@
+1554690688
diff --git a/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql
new file mode 100644
index 00000000000..a77720b6580
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql
@@ -0,0 +1,8 @@
+DROP TABLE IF EXISTS t1;
+
+CREATE TABLE t1 (c0 Int32, PRIMARY KEY (c0)) ENGINE=MergeTree;
+INSERT INTO t1 VALUES (1554690688);
+
+SELECT MIN(t1.c0) FROM t1 SETTINGS aggregate_functions_null_for_empty = 1;
+
+DROP TABLE IF EXISTS t1;

From 8d1bcb5c2f23ea0e29c61c8cb3f584586f5f8834 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Mon, 15 May 2023 16:28:28 +0800
Subject: [PATCH 8/9] fix typo

---
 src/Storages/StorageMongoDB.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h
index 682a027440d..2b77f076e7e 100644
--- a/src/Storages/StorageMongoDB.h
+++ b/src/Storages/StorageMongoDB.h
@@ -7,7 +7,7 @@
 namespace DB
 {
 /* Implements storage in the MongoDB database.
- * Use ENGINE = mysql(host_port, database_name, table_name, user_name, password)
+ * Use ENGINE = MongoDB(host:port, database, collection, user, password [, options]);
  * Read only.
  */
 

From e8f971aa2bdfcdfb12e0091a8aa6a7eb4e96ff8c Mon Sep 17 00:00:00 2001
From: Arthur Passos <arthur.ti@outlook.com>
Date: Mon, 15 May 2023 09:25:10 -0300
Subject: [PATCH 9/9] use LIMIT_EXCEEDED instead of TOO_LARGE_MAP_SIZE

---
 src/Common/ErrorCodes.cpp                                     | 1 -
 src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h     | 4 ++--
 .../02499_extract_key_value_pairs_multiple_input.sql          | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 382b8ed8019..e9dc5649245 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -576,7 +576,6 @@
     M(691, UNKNOWN_ELEMENT_OF_ENUM) \
     M(692, TOO_MANY_MUTATIONS) \
     M(693, AWS_ERROR) \
-    M(694, TOO_LARGE_MAP_SIZE) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h b/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
index 24c8d3cd89a..3895cf3e77d 100644
--- a/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
+++ b/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
@@ -13,7 +13,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
-    extern const int TOO_LARGE_MAP_SIZE;
+    extern const int LIMIT_EXCEEDED;
 }
 
 /*
@@ -118,7 +118,7 @@ private:
 
         if (row_offset > max_number_of_pairs)
         {
-            throw Exception(ErrorCodes::TOO_LARGE_MAP_SIZE, "Number of pairs produced exceeded the limit of {}", max_number_of_pairs);
+            throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Number of pairs produced exceeded the limit of {}", max_number_of_pairs);
         }
 
         key.commit();
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
index 8c5f0365cc7..38d09338774 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@@ -425,7 +425,7 @@ WITH
             'Map(String,String)'
         ) AS x
 SELECT
-    x; -- {serverError TOO_LARGE_MAP_SIZE}
+    x; -- {serverError LIMIT_EXCEEDED}
 
 -- { echoOn }