Merge remote-tracking branch 'origin/master' into fill_with_by_sorting_prefix_2

2024-12-03 21:12:28 +00:00 · 2023-05-15 14:40:46 +00:00 · 2023-05-15 14:40:46 +00:00 · 4d168400ae
commit 4d168400ae
parent b2dddf7cf8 07de815d96
38 changed files with 224 additions and 49 deletions
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -1218,12 +1218,16 @@ Rounds the time to the half hour.

 Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.

-### example
-```sql
+**Example**
+
+``` sql
 SELECT
    toYYYYMM(now(), 'US/Eastern')
 ```
-```response
+
+Result:
+
+``` text
 ┌─toYYYYMM(now(), 'US/Eastern')─┐
 │                        202303 │
 └───────────────────────────────┘
@ -1233,11 +1237,15 @@ SELECT

 Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.

-### example
+**Example**
+
 ```sql
 SELECT
    toYYYYMMDD(now(), 'US/Eastern')
 ```
+
+Result:
+
 ```response
 ┌─toYYYYMMDD(now(), 'US/Eastern')─┐
 │                        20230302 │
@ -1248,11 +1256,15 @@ SELECT

 Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.

-### example
+**Example**
+
 ```sql
 SELECT
    toYYYYMMDDhhmmss(now(), 'US/Eastern')
 ```
+
+Result:
+
 ```response
 ┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
 │                        20230302112209 │
--- a/src/Access/AuthenticationData.cpp
+++ b/src/Access/AuthenticationData.cpp
@ -10,6 +10,7 @@
 #include <Common/OpenSSLHelpers.h>
 #include <Poco/SHA1Engine.h>
 #include <base/types.h>
+#include <base/hex.h>
 #include <boost/algorithm/hex.hpp>
 #include <boost/algorithm/string/case_conv.hpp>

--- a/src/AggregateFunctions/AggregateFunctionForEach.h
+++ b/src/AggregateFunctions/AggregateFunctionForEach.h
@ -2,6 +2,7 @@

 #include <Columns/ColumnArray.h>
 #include <Common/assert_cast.h>
+#include <Common/Arena.h>
 #include <base/arithmeticOverflow.h>
 #include <DataTypes/DataTypeArray.h>
 #include <AggregateFunctions/IAggregateFunction.h>
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@ -10,6 +10,7 @@
 #include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <base/StringRef.h>
+#include <Common/Arena.h>
 #include <Common/assert_cast.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <AggregateFunctions/IAggregateFunction.h>
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -211,6 +211,7 @@ endif()
 if (TARGET ch_contrib::jemalloc)
    target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc)
 endif()
+target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)

 add_subdirectory(Access/Common)
 add_subdirectory(Common/ZooKeeper)
@ -463,7 +464,7 @@ endif ()
 if (TARGET ch_contrib::ldap)
    dbms_target_link_libraries (PRIVATE ch_contrib::ldap ch_contrib::lber)
 endif ()
-dbms_target_link_libraries (PRIVATE ch_contrib::sparsehash)
+dbms_target_link_libraries (PUBLIC ch_contrib::sparsehash)

 if (TARGET ch_contrib::protobuf)
    dbms_target_link_libraries (PRIVATE ch_contrib::protobuf)
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@ -663,12 +663,10 @@ Names Block::getDataTypeNames() const

 Block::NameMap Block::getNamesToIndexesMap() const
 {
-    NameMap res;
-    res.reserve(index_by_name.size());
-
+    NameMap res(index_by_name.size());
+    res.set_empty_key(StringRef{});
    for (const auto & [name, index] : index_by_name)
        res[name] = index;
-
    return res;
 }

--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@ -5,13 +5,11 @@
 #include <Core/ColumnsWithTypeAndName.h>
 #include <Core/NamesAndTypes.h>

-#include <Common/HashTable/HashMap.h>
-
 #include <initializer_list>
 #include <list>
-#include <map>
 #include <set>
 #include <vector>
+#include <sparsehash/dense_hash_map>


 namespace DB
@ -97,7 +95,7 @@ public:
    Names getDataTypeNames() const;

    /// Hash table match `column name -> position in the block`.
-    using NameMap = HashMap<StringRef, size_t, StringRefHash>;
+    using NameMap = ::google::dense_hash_map<StringRef, size_t, StringRefHash>;
    NameMap getNamesToIndexesMap() const;

    Serializations getSerializations() const;
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -749,6 +749,7 @@ class IColumn;
    M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
    M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
    M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
+    M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
    // End of COMMON_SETTINGS
    // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

--- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
+++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
@ -7,6 +7,7 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/AlignedBuffer.h>
+#include <Common/Arena.h>

 #include <Formats/FormatSettings.h>
 #include <Formats/ProtobufReader.h>
--- a/src/Formats/ColumnMapping.cpp
+++ b/src/Formats/ColumnMapping.cpp
@ -26,8 +26,8 @@ void ColumnMapping::addColumns(
    {
        names_of_columns.push_back(name);

-        const auto * column_it = column_indexes_by_names.find(name);
-        if (!column_it)
+        const auto column_it = column_indexes_by_names.find(name);
+        if (column_it == column_indexes_by_names.end())
        {
            if (settings.skip_unknown_fields)
            {
@ -43,7 +43,7 @@ void ColumnMapping::addColumns(
                            name, column_indexes_for_input_fields.size());
        }

-        const auto column_index = column_it->getMapped();
+        const auto column_index = column_it->second;

        if (read_columns[column_index])
            throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate field found while parsing format header: {}", name);
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@ -41,6 +41,7 @@
 #include <Common/FieldVisitorsAccurateComparison.h>
 #include <Common/assert_cast.h>
 #include <Common/typeid_cast.h>
+#include <Common/Arena.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <Interpreters/Context.h>

--- a/src/Functions/bitCount.cpp
+++ b/src/Functions/bitCount.cpp
@ -9,7 +9,7 @@ namespace DB
 template <typename A>
 struct BitCountImpl
 {
-    using ResultType = UInt8;
+    using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>;
    static constexpr bool allow_string_or_fixed_string = true;

    static inline ResultType apply(A a)
@ -17,6 +17,13 @@ struct BitCountImpl
        /// We count bits in the value representation in memory. For example, we support floats.
        /// We need to avoid sign-extension when converting signed numbers to larger type. So, uint8_t(-1) has 8 bits.

+        if constexpr (is_big_int_v<A>)
+        {
+            ResultType res = 0;
+            for (auto item : a.items)
+                res += __builtin_popcountll(item);
+            return res;
+        }
        if constexpr (std::is_same_v<A, UInt64> || std::is_same_v<A, Int64>)
            return __builtin_popcountll(a);
        if constexpr (std::is_same_v<A, UInt32> || std::is_same_v<A, Int32> || std::is_unsigned_v<A>)
--- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
+++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp
@ -7,6 +7,8 @@
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>

+#include <Interpreters/Context.h>
+
 #include <Functions/keyvaluepair/impl/KeyValuePairExtractor.h>
 #include <Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h>
 #include <Functions/keyvaluepair/ArgumentExtractor.h>
@ -41,6 +43,13 @@ class ExtractKeyValuePairs : public IFunction
            builder.withQuotingCharacter(parsed_arguments.quoting_character.value());
        }

+        bool is_number_of_pairs_unlimited = context->getSettingsRef().extract_kvp_max_pairs_per_row == 0;
+
+        if (!is_number_of_pairs_unlimited)
+        {
+            builder.withMaxNumberOfPairs(context->getSettingsRef().extract_kvp_max_pairs_per_row);
+        }
+
        return builder.build();
    }

@ -73,7 +82,7 @@ class ExtractKeyValuePairs : public IFunction
    }

 public:
-    ExtractKeyValuePairs() = default;
+    explicit ExtractKeyValuePairs(ContextPtr context_) : context(context_) {}

    static constexpr auto name = Name::name;

@ -82,9 +91,9 @@ public:
        return name;
    }

-    static FunctionPtr create(ContextPtr)
+    static FunctionPtr create(ContextPtr context)
    {
-        return std::make_shared<ExtractKeyValuePairs>();
+        return std::make_shared<ExtractKeyValuePairs>(context);
    }

    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
@ -120,6 +129,9 @@ public:
    {
        return {1, 2, 3, 4};
    }
+
+private:
+    ContextPtr context;
 };

 struct NameExtractKeyValuePairs
--- a/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
+++ b/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
@ -13,6 +13,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int LOGICAL_ERROR;
+    extern const int LIMIT_EXCEEDED;
 }

 /*
@ -25,8 +26,8 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
    using NextState = DB::extractKV::StateHandler::NextState;

 public:
-    explicit CHKeyValuePairExtractor(StateHandler state_handler_)
-        : state_handler(std::move(state_handler_))
+    explicit CHKeyValuePairExtractor(StateHandler state_handler_, uint64_t max_number_of_pairs_)
+        : state_handler(std::move(state_handler_)), max_number_of_pairs(max_number_of_pairs_)
    {}

    uint64_t extract(const std::string & data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values) override
@ -113,11 +114,16 @@ private:
    NextState flushPair(const std::string_view & file, auto & key,
                        auto & value, uint64_t & row_offset)
    {
+        row_offset++;
+
+        if (row_offset > max_number_of_pairs)
+        {
+            throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Number of pairs produced exceeded the limit of {}", max_number_of_pairs);
+        }
+
        key.commit();
        value.commit();

-        row_offset++;
-
        return {0, file.empty() ? State::END : State::WAITING_KEY};
    }

@ -128,6 +134,7 @@ private:
    }

    StateHandler state_handler;
+    uint64_t max_number_of_pairs;
 };

 }
--- a/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.cpp
+++ b/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.cpp
@ -31,6 +31,12 @@ KeyValuePairExtractorBuilder & KeyValuePairExtractorBuilder::withEscaping()
    return *this;
 }

+KeyValuePairExtractorBuilder & KeyValuePairExtractorBuilder::withMaxNumberOfPairs(uint64_t max_number_of_pairs_)
+{
+    max_number_of_pairs = max_number_of_pairs_;
+    return *this;
+}
+
 std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::build() const
 {
    if (with_escaping)
@ -46,9 +52,9 @@ namespace
 using namespace extractKV;

 template <typename T>
-auto makeStateHandler(const T && handler)
+auto makeStateHandler(const T && handler, uint64_t max_number_of_pairs)
 {
-    return std::make_shared<CHKeyValuePairExtractor<T>>(handler);
+    return std::make_shared<CHKeyValuePairExtractor<T>>(handler, max_number_of_pairs);
 }

 }
@ -57,14 +63,14 @@ std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::buildWithou
 {
    auto configuration = ConfigurationFactory::createWithoutEscaping(key_value_delimiter, quoting_character, item_delimiters);

-    return makeStateHandler(NoEscapingStateHandler(configuration));
+    return makeStateHandler(NoEscapingStateHandler(configuration), max_number_of_pairs);
 }

 std::shared_ptr<KeyValuePairExtractor> KeyValuePairExtractorBuilder::buildWithEscaping() const
 {
    auto configuration = ConfigurationFactory::createWithEscaping(key_value_delimiter, quoting_character, item_delimiters);

-    return makeStateHandler(InlineEscapingStateHandler(configuration));
+    return makeStateHandler(InlineEscapingStateHandler(configuration), max_number_of_pairs);
 }

 }
--- a/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h
+++ b/src/Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h
@ -20,6 +20,8 @@ public:

    KeyValuePairExtractorBuilder & withEscaping();

+    KeyValuePairExtractorBuilder & withMaxNumberOfPairs(uint64_t max_number_of_pairs_);
+
    std::shared_ptr<KeyValuePairExtractor> build() const;

 private:
@ -27,6 +29,7 @@ private:
    char key_value_delimiter = ':';
    char quoting_character = '"';
    std::vector<char> item_delimiters = {' ', ',', ';'};
+    uint64_t max_number_of_pairs = std::numeric_limits<uint64_t>::max();

    std::shared_ptr<KeyValuePairExtractor> buildWithEscaping() const;

--- a/src/Interpreters/QueryLog.h
+++ b/src/Interpreters/QueryLog.h
@ -1,5 +1,6 @@
 #pragma once

+#include <Common/ProfileEvents.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/NamesAndAliases.h>
 #include <Core/Settings.h>
--- a/src/Interpreters/QueryViewsLog.h
+++ b/src/Interpreters/QueryViewsLog.h
@ -13,6 +13,7 @@
 #include <Core/NamesAndAliases.h>
 #include <Interpreters/SystemLog.h>
 #include <base/types.h>
+#include <Common/ProfileEvents.h>

 namespace ProfileEvents
 {
--- a/src/Interpreters/tests/gtest_context_race.cpp
+++ b/src/Interpreters/tests/gtest_context_race.cpp
@ -1,6 +1,7 @@
 #include <Interpreters/Context.h>
 #include <Common/tests/gtest_global_context.h>
 #include <gtest/gtest.h>
+#include <thread>

 using namespace DB;

--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@ -64,20 +64,22 @@ inline size_t BSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz
    /// Optimization by caching the order of fields (which is almost always the same)
    /// and a quick check to match the next expected field, instead of searching the hash table.

-    if (prev_positions.size() > key_index && prev_positions[key_index] && name == prev_positions[key_index]->getKey())
+    if (prev_positions.size() > key_index
+        && prev_positions[key_index] != Block::NameMap::const_iterator{}
+        && name == prev_positions[key_index]->first)
    {
-        return prev_positions[key_index]->getMapped();
+        return prev_positions[key_index]->second;
    }
    else
    {
-        auto * it = name_map.find(name);
+        const auto it = name_map.find(name);

-        if (it)
+        if (it != name_map.end())
        {
            if (key_index < prev_positions.size())
                prev_positions[key_index] = it;

-            return it->getMapped();
+            return it->second;
        }
        else
            return UNKNOWN_FIELD;
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h
@ -91,7 +91,7 @@ private:
    Block::NameMap name_map;

    /// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
-    std::vector<Block::NameMap::LookupResult> prev_positions;
+    std::vector<Block::NameMap::const_iterator> prev_positions;

    DataTypes types;

--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
@ -128,7 +128,7 @@ Chunk JSONColumnsBlockInputFormatBase::generate()
        {
            /// Check if this name appears in header. If no, skip this column or throw
            /// an exception according to setting input_format_skip_unknown_fields
-            if (!name_to_index.has(*column_name))
+            if (name_to_index.find(*column_name) == name_to_index.end())
            {
                if (!format_settings.skip_unknown_fields)
                    throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column found in input data: {}", *column_name);
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@ -71,21 +71,20 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(StringRef name, size_t key_
    /// and a quick check to match the next expected field, instead of searching the hash table.

    if (prev_positions.size() > key_index
-        && prev_positions[key_index]
-        && name == prev_positions[key_index]->getKey())
+        && prev_positions[key_index] != Block::NameMap::const_iterator{}
+        && name == prev_positions[key_index]->first)
    {
-        return prev_positions[key_index]->getMapped();
+        return prev_positions[key_index]->second;
    }
    else
    {
-        auto * it = name_map.find(name);
-
-        if (it)
+        const auto it = name_map.find(name);
+        if (it != name_map.end())
        {
            if (key_index < prev_positions.size())
                prev_positions[key_index] = it;

-            return it->getMapped();
+            return it->second;
        }
        else
            return UNKNOWN_FIELD;
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
@ -71,11 +71,10 @@ private:
    /// for row like {..., "non-nullable column name" : null, ...}

    /// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map.
-    using NameMap = HashMap<StringRef, size_t, StringRefHash>;
-    NameMap name_map;
+    Block::NameMap name_map;

    /// Cached search results for previous row (keyed as index in JSON object) - used as a hint.
-    std::vector<NameMap::LookupResult> prev_positions;
+    std::vector<Block::NameMap::const_iterator> prev_positions;

    bool allow_new_rows = true;

--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Core/QueryProcessingStage.h>
+#include <Core/UUID.h>
 #include <Parsers/IAST_fwd.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/ResizeProcessor.h>
--- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
+++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
@ -42,6 +42,10 @@ bool canUseProjectionForReadingStep(ReadFromMergeTree * reading)
    if (reading->getContext()->getSettingsRef().allow_experimental_query_deduplication)
        return false;

+    // Currently projection don't support settings which implicitly modify aggregate functions.
+    if (reading->getContext()->getSettingsRef().aggregate_functions_null_for_empty)
+        return false;
+
    return true;
 }

--- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
+++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
@ -11,10 +11,11 @@
 #include <Processors/Formats/Impl/ParquetBlockInputFormat.h>
 #include <Processors/Formats/Impl/ArrowColumnToCHColumn.h>
 #include <Formats/FormatFactory.h>
-#include <boost/algorithm/string/case_conv.hpp>
-#include <parquet/arrow/reader.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnNullable.h>
+#include <IO/ReadHelpers.h>
+#include <boost/algorithm/string/case_conv.hpp>
+#include <parquet/arrow/reader.h>
 #include <ranges>

 namespace fs = std::filesystem;
--- a/src/Storages/DataLakes/HudiMetadataParser.cpp
+++ b/src/Storages/DataLakes/HudiMetadataParser.cpp
@ -1,6 +1,7 @@
 #include <Storages/DataLakes/HudiMetadataParser.h>
 #include <Common/logger_useful.h>
 #include <ranges>
+#include <base/find_symbols.h>
 #include <Poco/String.h>
 #include "config.h"
 #include <filesystem>
--- a/src/Storages/DataLakes/IcebergMetadataParser.cpp
+++ b/src/Storages/DataLakes/IcebergMetadataParser.cpp
@ -12,6 +12,7 @@
 #include <Storages/StorageS3.h>
 #include <Processors/Formats/Impl/AvroRowInputFormat.h>
 #include <Formats/FormatFactory.h>
+#include <IO/ReadHelpers.h>

 #include <Poco/JSON/Array.h>
 #include <Poco/JSON/Object.h>
--- a/src/Storages/StorageMongoDB.h
+++ b/src/Storages/StorageMongoDB.h
@ -7,7 +7,7 @@
 namespace DB
 {
 /* Implements storage in the MongoDB database.
- * Use ENGINE = mysql(host_port, database_name, table_name, user_name, password)
+ * Use ENGINE = MongoDB(host:port, database, collection, user, password [, options]);
 * Read only.
 */

--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@ -16,6 +16,7 @@
 #include <Poco/URI.h>
 #include <IO/S3/getObjectInfo.h>
 #include <IO/CompressionMethod.h>
+#include <IO/SeekableReadBuffer.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Storages/Cache/SchemaCache.h>
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@ -530,6 +530,7 @@ class SettingsRandomizer:
        "max_threads": lambda: random.randint(1, 64),
        "optimize_or_like_chain": lambda: random.randint(0, 1),
        "optimize_read_in_order": lambda: random.randint(0, 1),
+        "enable_multiple_prewhere_read_steps": lambda: random.randint(0, 1),
        "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
        "optimize_aggregation_in_order": lambda: random.randint(0, 1),
        "aggregation_in_order_max_block_bytes": lambda: random.randint(0, 50000000),
--- a/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference
+++ b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference
@ -0,0 +1 @@
+1554690688
--- a/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql
+++ b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql
@ -0,0 +1,8 @@
+DROP TABLE IF EXISTS t1;
+
+CREATE TABLE t1 (c0 Int32, PRIMARY KEY (c0)) ENGINE=MergeTree;
+INSERT INTO t1 VALUES (1554690688);
+
+SELECT MIN(t1.c0) FROM t1 SETTINGS aggregate_functions_null_for_empty = 1;
+
+DROP TABLE IF EXISTS t1;
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
@ -292,6 +292,35 @@ SELECT
    x;
 {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'}
 -- { echoOn }
+
+SET extract_kvp_max_pairs_per_row = 2;
+-- Should be allowed because it no longer exceeds the max number of pairs
+-- expected output: {'key1':'value1','key2':'value2'}
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x;
+{'key1':'value1','key2':'value2'}
+SET extract_kvp_max_pairs_per_row = 0;
+-- Should be allowed because max pairs per row is set to 0 (unlimited)
+-- expected output: {'key1':'value1','key2':'value2'}
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x;
+{'key1':'value1','key2':'value2'}
 -- should not fail because pair delimiters contains 8 characters, which is within the limit
 WITH
    extractKeyValuePairs('not_important', ':', '12345678', '\'') AS s_map,
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@ -414,7 +414,49 @@ WITH
 SELECT
    x; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}

+-- Should fail allowed because it exceeds the max number of pairs
+SET extract_kvp_max_pairs_per_row = 1;
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x; -- {serverError LIMIT_EXCEEDED}
+
 -- { echoOn }
+
+SET extract_kvp_max_pairs_per_row = 2;
+-- Should be allowed because it no longer exceeds the max number of pairs
+-- expected output: {'key1':'value1','key2':'value2'}
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x;
+
+SET extract_kvp_max_pairs_per_row = 0;
+-- Should be allowed because max pairs per row is set to 0 (unlimited)
+-- expected output: {'key1':'value1','key2':'value2'}
+WITH
+    extractKeyValuePairs('key1:value1,key2:value2') AS s_map,
+    CAST(
+            arrayMap(
+                    (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+                ),
+            'Map(String,String)'
+        ) AS x
+SELECT
+    x;
+
 -- should not fail because pair delimiters contains 8 characters, which is within the limit
 WITH
    extractKeyValuePairs('not_important', ':', '12345678', '\'') AS s_map,
--- a/tests/queries/0_stateless/02736_bit_count_big_int.reference
+++ b/tests/queries/0_stateless/02736_bit_count_big_int.reference
@ -0,0 +1,13 @@
+128
+256
+128
+256
+127
+255
+126
+255
+64
+UInt8
+UInt16
+UInt8
+UInt16
--- a/tests/queries/0_stateless/02736_bit_count_big_int.sql
+++ b/tests/queries/0_stateless/02736_bit_count_big_int.sql
@ -0,0 +1,19 @@
+SELECT bitCount(CAST(-1 AS UInt128));
+SELECT bitCount(CAST(-1 AS UInt256));
+
+SELECT bitCount(CAST(-1 AS Int128));
+SELECT bitCount(CAST(-1 AS Int256));
+
+SELECT bitCount(CAST(-1 AS UInt128) - 1);
+SELECT bitCount(CAST(-1 AS UInt256) - 2);
+
+SELECT bitCount(CAST(-1 AS Int128) - 3);
+SELECT bitCount(CAST(-1 AS Int256) - 4);
+
+SELECT bitCount(CAST(0xFFFFFFFFFFFFFFFF AS Int256));
+
+SELECT toTypeName(bitCount(1::UInt128));
+SELECT toTypeName(bitCount(1::UInt256));
+
+SELECT toTypeName(bitCount(1::Int128));
+SELECT toTypeName(bitCount(1::Int256));