Merge branch 'master' into improve_access_type

2024-09-22 01:30:51 +00:00 · 2022-03-31 12:19:17 +08:00 · 2022-03-31 12:19:17 +08:00 · 70087bc959
commit 70087bc959
parent e76ed3acde f055d7b692
100 changed files with 2383 additions and 1579 deletions
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -149,7 +149,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH"
  SplitBuildSmokeTest:
    needs: [BuilderDebSplitted]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Set envs
@ -316,7 +315,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinRelease:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -362,7 +360,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinGCC:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -636,7 +633,6 @@ jobs:
 ##########################################################################################
  BuilderDebSplitted:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -682,7 +678,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinTidy:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -728,7 +723,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinDarwin:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -774,7 +768,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinAarch64:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -820,7 +813,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinFreeBSD:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -866,7 +858,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinDarwinAarch64:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
@ -912,7 +903,6 @@ jobs:
          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  BuilderBinPPC64:
    needs: [DockerHubPush]
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
    runs-on: [self-hosted, builder]
    steps:
      - name: Set envs
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -115,7 +115,7 @@ function run_tests()
    fi
    set +e
-    clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
+    clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
        --skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \
        "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
--- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md
@ -0,0 +1,48 @@
 ---
 toc_priority: 108
 ---
 # groupArraySorted {#groupArraySorted}
 Returns an array with the first N items in ascending order.
 ``` sql
 groupArraySorted(N)(column)
 ```
 **Arguments**
 -   `N` – The number of elements to return.
 If the parameter is omitted, default value 10 is used.
 **Arguments**
 -   `column` – The value.
 -   `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
 **Example**
 Gets the first 10 numbers:
 ``` sql
 SELECT groupArraySorted(10)(number) FROM numbers(100)
 ```
 ``` text
 ┌─groupArraySorted(10)(number)─┐
 │ [0,1,2,3,4,5,6,7,8,9]        │
 └──────────────────────────────┘
 ```
 Or the last 10:
 ``` sql
 SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
 ```
 ``` text
 ┌─groupArraySorted(10)(number, negate(number))─┐
 │ [99,98,97,96,95,94,93,92,91,90]              │
 └──────────────────────────────────────────────┘
 ```
--- a/docs/en/sql-reference/aggregate-functions/reference/index.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/index.md
@ -35,6 +35,7 @@ ClickHouse-specific aggregate functions:
 -   [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
 -   [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
 -   [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
 -   [groupArraySorted](../../../sql-reference/aggregate-functions/reference/grouparraysorted.md)
 -   [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
 -   [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
 -   [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)
--- a/docs/ja/sql-reference/aggregate-functions/reference/grouparraysorted.md
+++ b/docs/ja/sql-reference/aggregate-functions/reference/grouparraysorted.md
--- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp
@ -0,0 +1,147 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeString.h>
 #include <Common/FieldVisitorConvertToNumber.h>
 static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
 static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
 namespace DB
 {
 struct Settings;
 namespace ErrorCodes
 {
    extern const int ARGUMENT_OUT_OF_BOUND;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 namespace
 {
    template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
    class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
    {
        using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
    };
    template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
    class AggregateFunctionGroupArraySortedFieldType
        : public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
    {
        using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
            AggregateFunctionGroupArraySorted;
        DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
    };
    template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
    AggregateFunctionPtr
    createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
    {
        return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
    }
    template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
    AggregateFunctionPtr
    createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
    {
 #define DISPATCH(A, C, B) \
    if (which.idx == TypeIndex::A) \
        return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
 #define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
        WhichDataType which(argument_types[0]);
        FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
        DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
        DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
        DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
        DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
 #undef DISPATCH
 #undef DISPATCH_NUMERIC
        if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
        {
            return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
                threshold, argument_types, params));
        }
        else
        {
            return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
                threshold, argument_types, params));
        }
    }
    AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
        const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
    {
        UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
        if (params.size() == 1)
        {
            UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
            if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
                throw Exception(
                    "Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
                    ErrorCodes::ARGUMENT_OUT_OF_BOUND);
            if (k == 0)
                throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
            threshold = k;
        }
        else if (!params.empty())
        {
            throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
        }
        if (argument_types.size() == 2)
        {
            if (isNumber(argument_types[1]))
            {
 #define DISPATCH2(A, B) \
    if (which.idx == TypeIndex::A) \
        return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
 #define DISPATCH(A) DISPATCH2(A, A)
                WhichDataType which(argument_types[1]);
                FOR_NUMERIC_TYPES(DISPATCH)
                DISPATCH2(Enum8, Int8)
                DISPATCH2(Enum16, Int16)
 #undef DISPATCH
 #undef DISPATCH2
                throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
            }
            else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
            {
                return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
            }
            else
            {
                return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
            }
        }
        else if (argument_types.size() == 1)
        {
            return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
        }
        else
        {
            throw Exception(
                "Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
        }
    }
 }
 void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
 {
    AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
    factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
 }
 }
--- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.h
@ -0,0 +1,310 @@
 #pragma once
 #include <Columns/ColumnArray.h>
 #include <DataTypes/DataTypeArray.h>
 #include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
 #include <AggregateFunctions/IAggregateFunction.h>
 namespace DB
 {
 template <typename TColumn, bool is_plain>
 inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
 {
    if constexpr (std::is_same_v<TColumn, StringRef>)
    {
        if constexpr (is_plain)
        {
            StringRef str = column->getDataAt(row);
            auto ptr = arena->alloc(str.size);
            std::copy(str.data, str.data + str.size, ptr);
            return StringRef(ptr, str.size);
        }
        else
        {
            const char * begin = nullptr;
            return column->serializeValueIntoArena(row, *arena, begin);
        }
    }
    else
    {
        if constexpr (std::is_same_v<TColumn, UInt64>)
            return column->getUInt(row);
        else
            return column->getInt(row);
    }
 }
 template <typename TColumn, typename TFilter = void>
 size_t
 getFirstNElements_low_threshold(const TColumn * data, int num_elements, int threshold, size_t * results, const TFilter * filter = nullptr)
 {
    for (int i = 0; i < threshold; i++)
    {
        results[i] = 0;
    }
    threshold = std::min(num_elements, threshold);
    int current_max = 0;
    int cur;
    int z;
    for (int i = 0; i < num_elements; i++)
    {
        if constexpr (!std::is_same_v<TFilter, void>)
        {
            if (filter[i] == 0)
                continue;
        }
        //Starting from the highest values and we look for the immediately lower than the given one
        for (cur = current_max; cur > 0; cur--)
        {
            if (data[i] > data[results[cur - 1]])
                break;
        }
        if (cur < threshold)
        {
            //Move all the higher values 1 position to the right
            for (z = std::min(threshold - 1, current_max); z > cur; z--)
                results[z] = results[z - 1];
            if (current_max < threshold)
                ++current_max;
            //insert element into the given position
            results[cur] = i;
        }
    }
    return current_max;
 }
 template <typename T>
 struct SortableItem
 {
    T a;
    size_t b;
    bool operator<(const SortableItem & other) const { return (this->a < other.a); }
 };
 template <typename TColumn, typename TFilter = void>
 size_t getFirstNElements_high_threshold(
    const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const TFilter * filter = nullptr)
 {
    std::vector<SortableItem<TColumn>> dataIndexed(num_elements);
    size_t num_elements_filtered = 0;
    for (size_t i = 0; i < num_elements; i++)
    {
        if constexpr (!std::is_same_v<TFilter, void>)
        {
            if (filter[i] == 0)
                continue;
        }
        dataIndexed.data()[num_elements_filtered].a = data[i];
        dataIndexed.data()[num_elements_filtered].b = i;
        num_elements_filtered++;
    }
    threshold = std::min(num_elements_filtered, threshold);
    std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
    std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
    for (size_t i = 0; i < threshold; i++)
    {
        results[i] = dataIndexed[i].b;
    }
    return threshold;
 }
 static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
 template <typename TColumn>
 size_t getFirstNElements(const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
 {
    if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
    {
        if (filter != nullptr)
            return getFirstNElements_low_threshold(data, num_elements, threshold, results, filter);
        else
            return getFirstNElements_low_threshold(data, num_elements, threshold, results);
    }
    else
    {
        if (filter != nullptr)
            return getFirstNElements_high_threshold(data, num_elements, threshold, results, filter);
        else
            return getFirstNElements_high_threshold(data, num_elements, threshold, results);
    }
 }
 template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
 class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
                                              AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
                                              AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
 {
 protected:
    using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
    using Base = IAggregateFunctionDataHelper<
        AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
        AggregateFunctionGroupArraySorted>;
    UInt64 threshold;
    DataTypePtr & input_data_type;
    mutable std::mutex mutex;
    static void deserializeAndInsert(StringRef str, IColumn & data_to);
 public:
    AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
        : IAggregateFunctionDataHelper<
            AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
            AggregateFunctionGroupArraySorted>(argument_types_, params)
        , threshold(threshold_)
        , input_data_type(this->argument_types[0])
    {
    }
    void create(AggregateDataPtr place) const override
    {
        Base::create(place);
        this->data(place).threshold = threshold;
    }
    String getName() const override { return "groupArraySorted"; }
    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
    bool allocatesMemoryInArena() const override
    {
        if constexpr (std::is_same_v<TColumnA, StringRef>)
            return true;
        else
            return false;
    }
    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
    {
        State & data = this->data(place);
        if constexpr (use_column_b)
        {
            data.add(
                readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
        }
        else
        {
            data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
        }
    }
    template <typename TColumn, bool is_plain, typename TFunc>
    void
    forFirstRows(size_t batch_size, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
    {
        const TColumn * values = nullptr;
        std::unique_ptr<std::vector<TColumn>> values_vector;
        std::vector<size_t> best_rows(threshold);
        if constexpr (std::is_same_v<TColumn, StringRef>)
        {
            values_vector.reset(new std::vector<TColumn>(batch_size));
            for (size_t i = 0; i < batch_size; i++)
                (*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
            values = (*values_vector).data();
        }
        else
        {
            const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
            values = column.getData().data();
        }
        const UInt8 * filter = nullptr;
        StringRef refFilter;
        if (if_argument_pos >= 0)
        {
            refFilter = columns[if_argument_pos]->getRawData();
            filter = reinterpret_cast<const UInt8 *>(refFilter.data);
        }
        size_t num_elements = getFirstNElements(values, batch_size, threshold, best_rows.data(), filter);
        for (size_t i = 0; i < num_elements; i++)
        {
            func(best_rows[i], values);
        }
    }
    void addBatchSinglePlace(
        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
    {
        State & data = this->data(place);
        if constexpr (use_column_b)
        {
            forFirstRows<TColumnB, is_plain_b>(
                batch_size, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
                {
                    data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
                });
        }
        else
        {
            forFirstRows<TColumnA, is_plain_a>(
                batch_size, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
                {
                    data.add(values[row]);
                });
        }
    }
    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
    {
        this->data(place).merge(this->data(rhs));
    }
    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
    {
        this->data(place).serialize(buf);
    }
    void
    deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version  */, Arena * arena) const override
    {
        this->data(place).deserialize(buf, arena);
    }
    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
    {
        ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
        ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
        auto & values = this->data(place).values;
        offsets_to.push_back(offsets_to.back() + values.size());
        IColumn & data_to = arr_to.getData();
        for (auto value : values)
        {
            if constexpr (std::is_same_v<TColumnA, StringRef>)
            {
                auto str = State::itemValue(value);
                if constexpr (is_plain_a)
                {
                    data_to.insertData(str.data, str.size);
                }
                else
                {
                    data_to.deserializeAndInsertFromArena(str.data);
                }
            }
            else
            {
                data_to.insert(State::itemValue(value));
            }
        }
    }
 };
 }
--- a/src/AggregateFunctions/AggregateFunctionGroupArraySortedData.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArraySortedData.h
@ -0,0 +1,162 @@
 #pragma once
 #include <IO/ReadBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <IO/VarInt.h>
 #include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
 static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
 namespace DB
 {
 template <typename T>
 static void writeOneItem(WriteBuffer & buf, T item)
 {
    if constexpr (std::numeric_limits<T>::is_signed)
    {
        writeVarInt(item, buf);
    }
    else
    {
        writeVarUInt(item, buf);
    }
 }
 static void writeOneItem(WriteBuffer & buf, const StringRef & item)
 {
    writeBinary(item, buf);
 }
 template <typename T>
 static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
 {
    if constexpr (std::numeric_limits<T>::is_signed)
    {
        DB::Int64 val;
        readVarT(val, buf);
        item = val;
    }
    else
    {
        DB::UInt64 val;
        readVarT(val, buf);
        item = val;
    }
 }
 static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
 {
    item = readStringBinaryInto(*arena, buf);
 }
 template <typename Storage>
 struct AggregateFunctionGroupArraySortedDataBase
 {
    typedef typename Storage::value_type ValueType;
    AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
    virtual ~AggregateFunctionGroupArraySortedDataBase() { }
    inline void narrowDown()
    {
        while (values.size() > threshold)
            values.erase(--values.end());
    }
    void merge(const AggregateFunctionGroupArraySortedDataBase & other)
    {
        values.merge(Storage(other.values));
        narrowDown();
    }
    void serialize(WriteBuffer & buf) const
    {
        writeOneItem(buf, UInt64(values.size()));
        for (auto value : values)
        {
            serializeItem(buf, value);
        }
    }
    virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
    virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
    void deserialize(ReadBuffer & buf, Arena * arena)
    {
        values.clear();
        UInt64 length;
        readOneItem(buf, nullptr, length);
        while (length--)
        {
            values.insert(deserializeItem(buf, arena));
        }
        narrowDown();
    }
    UInt64 threshold;
    Storage values;
 };
 template <typename T, bool expr_sorted, typename TIndex>
 struct AggregateFunctionGroupArraySortedData
 {
 };
 template <typename T, typename TIndex>
 struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
 {
    using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
    using Base::Base;
    void add(T item, TIndex weight)
    {
        Base::values.insert({weight, item});
        Base::narrowDown();
    }
    void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
    {
        writeOneItem(buf, value.first);
        writeOneItem(buf, value.second);
    }
    virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
    {
        TIndex first;
        T second;
        readOneItem(buf, arena, first);
        readOneItem(buf, arena, second);
        return {first, second};
    }
    static T itemValue(typename Base::ValueType & value) { return value.second; }
 };
 template <typename T, typename TIndex>
 struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
 {
    using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
    using Base::Base;
    void add(T item)
    {
        Base::values.insert(item);
        Base::narrowDown();
    }
    void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
    typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
    {
        T value;
        readOneItem(buf, arena, value);
        return value;
    }
    static T itemValue(typename Base::ValueType & value) { return value; }
 };
 }
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -59,6 +59,7 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
 void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
 void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
 void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
 void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
 class AggregateFunctionCombinatorFactory;
 void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -130,6 +131,7 @@ void registerAggregateFunctions()
        registerAggregateFunctionIntervalLengthSum(factory);
        registerAggregateFunctionExponentialMovingAverage(factory);
        registerAggregateFunctionSparkbar(factory);
        registerAggregateFunctionGroupArraySorted(factory);
        registerWindowFunctions(factory);
    }
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@ -35,10 +35,10 @@ public:
    {}
    // Format message with fmt::format, like the logging functions.
-    template <typename ...Args>
+    template <typename... Args>
-    Exception(int code, const std::string & fmt, Args&&... args)
+    Exception(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
-        : Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
+    {
-    {}
+    }
    struct CreateFromPocoTag {};
    struct CreateFromSTDTag {};
@ -52,10 +52,10 @@ public:
    const char * what() const throw() override { return message().data(); }
    /// Add something to the existing message.
-    template <typename ...Args>
+    template <typename... Args>
-    void addMessage(const std::string& format, Args&&... args)
+    void addMessage(fmt::format_string<Args...> format, Args &&... args)
    {
-        extendedMessage(fmt::format(fmt::runtime(format), std::forward<Args>(args)...));
+        extendedMessage(fmt::format(format, std::forward<Args>(args)...));
    }
    void addMessage(const std::string& message)
@ -117,10 +117,10 @@ public:
    ParsingException(int code, const std::string & message);
    // Format message with fmt::format, like the logging functions.
-    template <typename ...Args>
+    template <typename... Args>
-    ParsingException(int code, const std::string & fmt, Args&&... args)
+    ParsingException(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(code, fmt, std::forward<Args>(args)...)
-        : Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
+    {
-    {}
+    }
    std::string displayText() const
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@ -67,6 +67,9 @@ struct FixedHashTableCalculatedSize
 {
    size_t getSize(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
    {
        if (!buf)
            return 0;
        size_t res = 0;
        for (const Cell * end = buf + num_cells; buf != end; ++buf)
            if (!buf->isZero(state))
@ -76,6 +79,9 @@ struct FixedHashTableCalculatedSize
    bool isEmpty(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
    {
        if (!buf)
            return true;
        for (const Cell * end = buf + num_cells; buf != end; ++buf)
            if (!buf->isZero(state))
                return false;
--- a/src/Common/HashTable/TwoLevelHashTable.h
+++ b/src/Common/HashTable/TwoLevelHashTable.h
@ -94,6 +94,12 @@ public:
    TwoLevelHashTable() = default;
    explicit TwoLevelHashTable(size_t size_hint)
    {
        for (auto & impl : impls)
            impl.reserve(size_hint / NUM_BUCKETS);
    }
    /// Copy the data from another (normal) hash table. It should have the same hash function.
    template <typename Source>
    explicit TwoLevelHashTable(const Source & src)
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -285,6 +285,9 @@
    \
    M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
    \
    M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \
    M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \
    \
    M(MergeTreeMetadataCacheGet, "Number of rocksdb reads(used for merge tree metadata cache)") \
    M(MergeTreeMetadataCachePut, "Number of rocksdb puts(used for merge tree metadata cache)") \
    M(MergeTreeMetadataCacheDelete, "Number of rocksdb deletes(used for merge tree metadata cache)") \
--- a/src/Common/RangeGenerator.h
+++ b/src/Common/RangeGenerator.h
@ -0,0 +1,46 @@
 #pragma once
 #include <optional>
 #include <cmath>
 namespace DB
 {
 class RangeGenerator
 {
 public:
    explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
        : from(range_start), range_step(range_step_), total_size(total_size_)
    {
    }
    size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
    using Range = std::pair<size_t, size_t>;
    // return upper exclusive range of values, i.e. [from_range, to_range>
    std::optional<Range> nextRange()
    {
        if (from >= total_size)
        {
            return std::nullopt;
        }
        auto to = from + range_step;
        if (to >= total_size)
        {
            to = total_size;
        }
        Range range{from, to};
        from = to;
        return range;
    }
 private:
    size_t from;
    size_t range_step;
    size_t total_size;
 };
 }
--- a/src/Common/format.h
+++ b/src/Common/format.h
@ -0,0 +1,178 @@
 #pragma once
 #include <base/types.h>
 #include <Common/Exception.h>
 #include <Common/PODArray.h>
 #include <Common/StringUtils/StringUtils.h>
 namespace DB
 {
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
 }
 namespace Format
 {
    using IndexPositions = PODArrayWithStackMemory<UInt64, 64>;
    static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number)
    {
        res = 0;
        for (UInt64 pos = l; pos < r; ++pos)
        {
            if (!isNumericASCII(description[pos]))
                throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
            res = res * 10 + description[pos] - '0';
            if (res >= argument_number)
                throw Exception(
                    "Too big number for arguments, must be at most " + std::to_string(argument_number - 1), ErrorCodes::BAD_ARGUMENTS);
        }
    }
    static inline void init(
        const String & pattern,
        size_t argument_number,
        const std::vector<std::optional<String>> & constant_strings,
        IndexPositions & index_positions,
        std::vector<String> & substrings)
    {
        /// Is current position after open curly brace.
        bool is_open_curly = false;
        /// The position of last open token.
        size_t last_open = -1;
        /// Is formatting in a plain {} token.
        std::optional<bool> is_plain_numbering;
        UInt64 index_if_plain = 0;
        /// Left position of adding substrings, just to the closed brace position or the start of the string.
        /// Invariant --- the start of substring is in this position.
        size_t start_pos = 0;
        /// A flag to decide whether we should glue the constant strings.
        bool glue_to_next = false;
        /// Handling double braces (escaping).
        auto double_brace_removal = [](String & str)
        {
            size_t i = 0;
            bool should_delete = true;
            str.erase(
                std::remove_if(
                    str.begin(),
                    str.end(),
                    [&i, &should_delete, &str](char)
                    {
                        bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
                        ++i;
                        if (is_double_brace && should_delete)
                        {
                            should_delete = false;
                            return true;
                        }
                        should_delete = true;
                        return false;
                    }),
                str.end());
        };
        index_positions.emplace_back();
        for (size_t i = 0; i < pattern.size(); ++i)
        {
            if (pattern[i] == '{')
            {
                /// Escaping handling
                /// It is safe to access because of null termination
                if (pattern[i + 1] == '{')
                {
                    ++i;
                    continue;
                }
                if (is_open_curly)
                    throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
                String to_add = String(pattern.data() + start_pos, i - start_pos);
                double_brace_removal(to_add);
                if (!glue_to_next)
                    substrings.emplace_back(to_add);
                else
                    substrings.back() += to_add;
                glue_to_next = false;
                is_open_curly = true;
                last_open = i + 1;
            }
            else if (pattern[i] == '}')
            {
                if (pattern[i + 1] == '}')
                {
                    ++i;
                    continue;
                }
                if (!is_open_curly)
                    throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
                is_open_curly = false;
                if (last_open == i)
                {
                    if (is_plain_numbering && !*is_plain_numbering)
                        throw Exception(
                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
                    is_plain_numbering = true;
                    if (index_if_plain >= argument_number)
                        throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
                    index_positions.back() = index_if_plain++;
                }
                else
                {
                    if (is_plain_numbering && *is_plain_numbering)
                        throw Exception(
                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
                    is_plain_numbering = false;
                    UInt64 arg;
                    parseNumber(pattern, last_open, i, arg, argument_number);
                    if (arg >= argument_number)
                        throw Exception(
                            "Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
                    index_positions.back() = arg;
                }
                if (!constant_strings.empty() && constant_strings[index_positions.back()])
                {
                    /// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
                    glue_to_next = true;
                    substrings.back() += *constant_strings[index_positions.back()];
                }
                else
                    index_positions.emplace_back(); /// Otherwise we commit arg number and proceed.
                start_pos = i + 1;
            }
        }
        if (is_open_curly)
            throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
        String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
        double_brace_removal(to_add);
        if (!glue_to_next)
            substrings.emplace_back(to_add);
        else
            substrings.back() += to_add;
        index_positions.pop_back();
    }
 }
 }
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -500,6 +500,10 @@ class IColumn;
    M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
    M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
    \
    M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
    M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
    M(UInt64, max_size_to_preallocate_for_aggregation, 10'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \
    \
    /** Experimental feature for moving data between shards. */ \
    \
    M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@ -6,6 +6,7 @@
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/ApplyWithSubqueryVisitor.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ParserCreateQuery.h>
@ -55,6 +56,9 @@ std::pair<String, StoragePtr> createTableFromAST(
    ast_create_query.attach = true;
    ast_create_query.setDatabase(database_name);
    if (ast_create_query.select && ast_create_query.isView())
        ApplyWithSubqueryVisitor().visit(*ast_create_query.select);
    if (ast_create_query.as_table_function)
    {
        const auto & factory = TableFunctionFactory::instance();
--- a/src/Databases/DatabaseReplicatedWorker.cpp
+++ b/src/Databases/DatabaseReplicatedWorker.cpp
@ -179,8 +179,12 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
    if (!task->was_executed)
    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} was executed, but was not committed: code {}: {}",
+        throw Exception(
-                        task->execution_status.code, task->execution_status.message);
+            ErrorCodes::LOGICAL_ERROR,
            "Entry {} was executed, but was not committed: code {}: {}",
            task->entry_name,
            task->execution_status.code,
            task->execution_status.message);
    }
    try_node->setAlreadyRemoved();
--- a/src/Dictionaries/XDBCDictionarySource.cpp
+++ b/src/Dictionaries/XDBCDictionarySource.cpp
@ -50,7 +50,7 @@ namespace
        {
            if (!qualified_name.database.empty())
                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Dictionary source of type {} specifies a schema but schema is not supported by {}-driver",
+                    "Dictionary source specifies a schema but schema is not supported by {}-driver",
                    bridge_.getName());
        }
--- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp
@ -392,8 +392,13 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
                if (bytes_to_predownload)
                    throw Exception(
                        ErrorCodes::LOGICAL_ERROR,
-                        "Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, eof: {}",
+                        "Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, "
-                        file_segment->range().toString(), file_segment->getDownloadOffset(), file_offset_of_buffer_end, implementation_buffer->eof());
+                        "eof: {}",
                        bytes_to_predownload,
                        file_segment->range().toString(),
                        file_segment->getDownloadOffset(),
                        file_offset_of_buffer_end,
                        implementation_buffer->eof());
                auto result = implementation_buffer->hasPendingData();
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@ -44,7 +44,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
    {
        return std::make_unique<ReadBufferFromS3>(
            client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
-            settings, /* use_external_buffer */true, read_until_position, /* restricted_seek */true);
+            settings, /* use_external_buffer */true, /* offset */ 0, read_until_position, /* restricted_seek */true);
    };
    if (with_cache)
--- a/src/Formats/FormatSchemaInfo.cpp
+++ b/src/Formats/FormatSchemaInfo.cpp
@ -85,9 +85,12 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String &
    else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
    {
        if (is_server)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            throw Exception(
-                            "Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
+                ErrorCodes::BAD_ARGUMENTS,
-                            path.string());
+                "Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
                default_schema_directory(),
                path.string(),
                default_schema_directory());
        path = default_schema_directory_path / path;
        schema_path = path.filename();
        schema_directory = path.parent_path() / "";
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@ -887,7 +887,7 @@ struct ConvertImplGenericToString
        const IColumn & col_from = *col_with_type_and_name.column;
        size_t size = col_from.size();
-        auto col_to = result_type->createColumn();
+        auto col_to = removeNullable(result_type)->createColumn();
        {
            ColumnStringHelpers::WriteHelper write_helper(
--- a/src/Functions/FunctionsStringArray.h
+++ b/src/Functions/FunctionsStringArray.h
@ -259,7 +259,7 @@ public:
            throw Exception(
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                "Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
-                arguments.size());
+                name, arguments.size());
        if (!isString(arguments[0]))
            throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
--- a/src/Functions/IFunction.cpp
+++ b/src/Functions/IFunction.cpp
@ -181,9 +181,12 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
        // Default implementation for nulls returns null result for null arguments,
        // so the result type must be nullable.
        if (!result_type->isNullable())
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
+            throw Exception(
-                            "Function {} with Null argument and default implementation for Nulls "
+                ErrorCodes::LOGICAL_ERROR,
-                            "is expected to return Nullable result, got {}", result_type->getName());
+                "Function {} with Null argument and default implementation for Nulls "
                "is expected to return Nullable result, got {}",
                getName(),
                result_type->getName());
        return result_type->createColumnConstWithDefaultValue(input_rows_count);
    }
--- a/src/Functions/castOrDefault.cpp
+++ b/src/Functions/castOrDefault.cpp
@ -231,7 +231,7 @@ private:
            {
                throw Exception(ErrorCodes::BAD_ARGUMENTS,
                    "Function {} decimal scale should have native UInt type. Actual {}",
-                    scale_argument.type->getName());
+                    getName(), scale_argument.type->getName());
            }
            scale = arguments[additional_argument_index].column->getUInt(0);
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@ -52,23 +52,21 @@ public:
    {
        if (arguments.size() < 2)
            throw Exception(
-                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                    + ", should be at least 2.",
+                "Number of arguments for function {} doesn't match: passed {}, should be at least 2",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                getName(),
-
+                arguments.size());
        if (arguments.size() > FormatImpl::argument_threshold)
            throw Exception(
                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
                    + ", should be at most " + std::to_string(FormatImpl::argument_threshold),
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
        for (const auto arg_idx : collections::range(0, arguments.size()))
        {
            const auto * arg = arguments[arg_idx].get();
            if (!isStringOrFixedString(arg))
-                throw Exception{"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function "
+                throw Exception(
-                                    + getName(),
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+                    "Illegal type {} of argument {} of function {}",
                    arg->getName(),
                    arg_idx + 1,
                    getName());
        }
        return std::make_shared<DataTypeString>();
@ -125,7 +123,7 @@ private:
        std::vector<const ColumnString::Chars *> data(num_arguments);
        std::vector<const ColumnString::Offsets *> offsets(num_arguments);
        std::vector<size_t> fixed_string_sizes(num_arguments);
-        std::vector<String> constant_strings(num_arguments);
+        std::vector<std::optional<String>> constant_strings(num_arguments);
        bool has_column_string = false;
        bool has_column_fixed_string = false;
        for (size_t i = 0; i < num_arguments; ++i)
--- a/src/Functions/dateName.cpp
+++ b/src/Functions/dateName.cpp
@ -112,7 +112,7 @@ public:
            || (res = executeType<DataTypeDateTime64>(arguments, result_type))))
            throw Exception(
                ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column {} of function {], must be Date or DateTime.",
+                "Illegal column {} of function {}, must be Date or DateTime.",
                arguments[1].column->getName(),
                getName());
--- a/src/Functions/formatString.cpp
+++ b/src/Functions/formatString.cpp
@ -45,25 +45,23 @@ public:
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
-        if (arguments.empty())
+        if (arguments.size() < 2)
            throw Exception(
-                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                    + ", should be at least 1",
+                "Number of arguments for function {} doesn't match: passed {}, should be at least 2",
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                getName(),
-
+                arguments.size());
        if (arguments.size() > FormatImpl::argument_threshold)
            throw Exception(
                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
                    + ", should be at most " + std::to_string(FormatImpl::argument_threshold),
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
        for (const auto arg_idx : collections::range(0, arguments.size()))
        {
            const auto * arg = arguments[arg_idx].get();
            if (!isStringOrFixedString(arg))
                throw Exception(
-                    "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName(),
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+                    "Illegal type {} of argument {} of function {}",
                    arg->getName(),
                    arg_idx + 1,
                    getName());
        }
        return std::make_shared<DataTypeString>();
@ -84,7 +82,7 @@ public:
        std::vector<const ColumnString::Chars *> data(arguments.size() - 1);
        std::vector<const ColumnString::Offsets *> offsets(arguments.size() - 1);
        std::vector<size_t> fixed_string_sizes(arguments.size() - 1);
-        std::vector<String> constant_strings(arguments.size() - 1);
+        std::vector<std::optional<String>> constant_strings(arguments.size() - 1);
        bool has_column_string = false;
        bool has_column_fixed_string = false;
--- a/src/Functions/formatString.h
+++ b/src/Functions/formatString.h
@ -4,8 +4,10 @@
 #include <base/types.h>
 #include <Common/Exception.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/format.h>
 #include <Common/memcpySmall.h>
 #include <algorithm>
 #include <optional>
 #include <string>
@ -15,15 +17,9 @@
 namespace DB
 {
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
 }
 struct FormatImpl
 {
    static constexpr size_t small_argument_threshold = 1024;
    static constexpr size_t argument_threshold = std::numeric_limits<UInt32>::max();
    static constexpr size_t right_padding = 15;
    template <typename... Args>
@ -39,165 +35,10 @@ struct FormatImpl
            format<false, false>(std::forward<Args>(args)...);
    }
    static void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res)
    {
        res = 0;
        for (UInt64 pos = l; pos < r; ++pos)
        {
            if (!isNumericASCII(description[pos]))
                throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
            res = res * 10 + description[pos] - '0';
            if (res >= argument_threshold)
                throw Exception(
                    "Too big number for arguments, must be at most " + std::to_string(argument_threshold), ErrorCodes::BAD_ARGUMENTS);
        }
    }
    static inline void init(
        const String & pattern,
        const std::vector<const ColumnString::Chars *> & data,
        size_t argument_number,
        const std::vector<String> & constant_strings,
        UInt64 * index_positions_ptr,
        std::vector<String> & substrings)
    {
        /// Is current position after open curly brace.
        bool is_open_curly = false;
        /// The position of last open token.
        size_t last_open = -1;
        /// Is formatting in a plain {} token.
        std::optional<bool> is_plain_numbering;
        UInt64 index_if_plain = 0;
        /// Left position of adding substrings, just to the closed brace position or the start of the string.
        /// Invariant --- the start of substring is in this position.
        size_t start_pos = 0;
        /// A flag to decide whether we should glue the constant strings.
        bool glue_to_next = false;
        /// Handling double braces (escaping).
        auto double_brace_removal = [](String & str)
        {
            size_t i = 0;
            bool should_delete = true;
            str.erase(
                std::remove_if(
                    str.begin(),
                    str.end(),
                    [&i, &should_delete, &str](char)
                    {
                        bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
                        ++i;
                        if (is_double_brace && should_delete)
                        {
                            should_delete = false;
                            return true;
                        }
                        should_delete = true;
                        return false;
                    }),
                str.end());
        };
        for (size_t i = 0; i < pattern.size(); ++i)
        {
            if (pattern[i] == '{')
            {
                /// Escaping handling
                /// It is safe to access because of null termination
                if (pattern[i + 1] == '{')
                {
                    ++i;
                    continue;
                }
                if (is_open_curly)
                    throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
                String to_add = String(pattern.data() + start_pos, i - start_pos);
                double_brace_removal(to_add);
                if (!glue_to_next)
                    substrings.emplace_back(to_add);
                else
                    substrings.back() += to_add;
                glue_to_next = false;
                is_open_curly = true;
                last_open = i + 1;
            }
            else if (pattern[i] == '}')
            {
                if (pattern[i + 1] == '}')
                {
                    ++i;
                    continue;
                }
                if (!is_open_curly)
                    throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
                is_open_curly = false;
                if (last_open == i)
                {
                    if (is_plain_numbering && !*is_plain_numbering)
                        throw Exception(
                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
                    is_plain_numbering = true;
                    if (index_if_plain >= argument_number)
                        throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
                    *index_positions_ptr = index_if_plain++;
                }
                else
                {
                    if (is_plain_numbering && *is_plain_numbering)
                        throw Exception(
                            "Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
                    is_plain_numbering = false;
                    UInt64 arg;
                    parseNumber(pattern, last_open, i, arg);
                    if (arg >= argument_number)
                        throw Exception(
                            "Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
                    *index_positions_ptr = arg;
                }
                /// Constant string.
                if (!data[*index_positions_ptr])
                {
                    /// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
                    glue_to_next = true;
                    substrings.back() += constant_strings[*index_positions_ptr];
                }
                else
                    ++index_positions_ptr; /// Otherwise we commit arg number and proceed.
                start_pos = i + 1;
            }
        }
        if (is_open_curly)
            throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
        String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
        double_brace_removal(to_add);
        if (!glue_to_next)
            substrings.emplace_back(to_add);
        else
            substrings.back() += to_add;
    }
    /// data for ColumnString and ColumnFixed. Nullptr means no data, it is const string.
    /// offsets for ColumnString, nullptr is an indicator that there is a fixed string rather than ColumnString.
    /// fixed_string_N for savings N to fixed strings.
-    /// constant_strings for constant strings. If data[i] is nullptr, than it is constant string.
+    /// constant_strings for constant strings. If data[i] is nullptr, it is constant string.
    /// res_data is result_data, res_offsets is offset result.
    /// input_rows_count is the number of rows processed.
    /// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size().
@ -207,29 +48,22 @@ struct FormatImpl
        const std::vector<const ColumnString::Chars *> & data,
        const std::vector<const ColumnString::Offsets *> & offsets,
        [[maybe_unused]] /* Because sometimes !has_column_fixed_string */ const std::vector<size_t> & fixed_string_N,
-        const std::vector<String> & constant_strings,
+        const std::vector<std::optional<String>> & constant_strings,
        ColumnString::Chars & res_data,
        ColumnString::Offsets & res_offsets,
        size_t input_rows_count)
    {
        const size_t argument_number = offsets.size();
-        UInt64 small_index_positions_buffer[small_argument_threshold];
+        /// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this
-        /// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this array will be filled with [1, 3, 1, 0, ... (garbage)] but without constant string indices.
+        /// array will be filled with [1, 3, 1, 0] but without constant string indices.
-        UInt64 * index_positions = small_index_positions_buffer;
+        Format::IndexPositions index_positions;
        std::unique_ptr<UInt64[]> big_index_positions_buffer;
        if (argument_number > small_argument_threshold)
        {
            big_index_positions_buffer.reset(new UInt64[argument_number]);
            index_positions = big_index_positions_buffer.get();
        }
        /// Vector of substrings of pattern that will be copied to the answer, not string view because of escaping and iterators invalidation.
        /// These are exactly what is between {} tokens, for `Hello {} world {}` we will have [`Hello `, ` world `, ``].
        std::vector<String> substrings;
-        init(pattern, data, argument_number, constant_strings, index_positions, substrings);
+        Format::init(pattern, argument_number, constant_strings, index_positions, substrings);
        UInt64 final_size = 0;
@ -271,7 +105,7 @@ struct FormatImpl
                for (size_t j = 1; j < substrings.size(); ++j)
                {
                    UInt64 arg = index_positions[j - 1];
-                    auto offset_ptr = offsets[arg];
+                    const auto * offset_ptr = offsets[arg];
                    UInt64 arg_offset = 0;
                    UInt64 size = 0;
--- a/src/IO/ParallelReadBuffer.cpp
+++ b/src/IO/ParallelReadBuffer.cpp
@ -237,7 +237,7 @@ void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
        while (!emergency_stop && !read_worker->cancel)
        {
            if (!read_worker->reader->next())
-                throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to read all the data from the reader, missing {} bytes", read_worker->bytes_left);
            if (emergency_stop || read_worker->cancel)
                break;
--- a/src/IO/ParallelReadBuffer.h
+++ b/src/IO/ParallelReadBuffer.h
@ -82,8 +82,8 @@ public:
        std::unique_ptr<ReadBufferFactory> reader_factory_,
        ThreadPool * pool,
        size_t max_working_readers,
-        WorkerSetup worker_setup = {},
+        WorkerSetup worker_setup = [](ThreadStatus &){},
-        WorkerCleanup worker_cleanup = {});
+        WorkerCleanup worker_cleanup = [](ThreadStatus &){});
    ~ParallelReadBuffer() override { finishAndWait(); }
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@ -1,4 +1,5 @@
 #include <Common/config.h>
 #include "IO/S3Common.h"
 #if USE_AWS_S3
@ -42,6 +43,7 @@ ReadBufferFromS3::ReadBufferFromS3(
    UInt64 max_single_read_retries_,
    const ReadSettings & settings_,
    bool use_external_buffer_,
    size_t offset_,
    size_t read_until_position_,
    bool restricted_seek_)
    : SeekableReadBufferWithSize(nullptr, 0)
@ -49,9 +51,10 @@ ReadBufferFromS3::ReadBufferFromS3(
    , bucket(bucket_)
    , key(key_)
    , max_single_read_retries(max_single_read_retries_)
    , offset(offset_)
    , read_until_position(read_until_position_)
    , read_settings(settings_)
    , use_external_buffer(use_external_buffer_)
    , read_until_position(read_until_position_)
    , restricted_seek(restricted_seek_)
 {
 }
@ -210,13 +213,14 @@ std::optional<size_t> ReadBufferFromS3::getTotalSize()
    if (file_size)
        return file_size;
-    Aws::S3::Model::HeadObjectRequest request;
+    auto object_size = S3::getObjectSize(client_ptr, bucket, key, false);
    request.SetBucket(bucket);
    request.SetKey(key);
-    auto outcome = client_ptr->HeadObject(request);
+    if (!object_size)
-    auto head_result = outcome.GetResultWithOwnership();
+    {
-    file_size = head_result.GetContentLength();
+        return std::nullopt;
    }
    file_size = object_size;
    return file_size;
 }
@ -234,6 +238,11 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position)
    }
 }
 SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const
 {
    return Range{.left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt};
 }
 std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
 {
    Aws::S3::Model::GetObjectRequest req;
@ -272,6 +281,36 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
        throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
 }
 SeekableReadBufferPtr ReadBufferS3Factory::getReader()
 {
    const auto next_range = range_generator.nextRange();
    if (!next_range)
    {
        return nullptr;
    }
    auto reader = std::make_shared<ReadBufferFromS3>(
        client_ptr,
        bucket,
        key,
        s3_max_single_read_retries,
        read_settings,
        false /*use_external_buffer*/,
        next_range->first,
        next_range->second);
    return reader;
 }
 off_t ReadBufferS3Factory::seek(off_t off, [[maybe_unused]] int whence)
 {
    range_generator = RangeGenerator{object_size, range_step, static_cast<size_t>(off)};
    return off;
 }
 std::optional<size_t> ReadBufferS3Factory::getTotalSize()
 {
    return object_size;
 }
 }
 #endif
--- a/src/IO/ReadBufferFromS3.h
+++ b/src/IO/ReadBufferFromS3.h
@ -1,5 +1,6 @@
 #pragma once
 #include <Common/RangeGenerator.h>
 #include <Common/config.h>
 #if USE_AWS_S3
@ -7,6 +8,7 @@
 #include <memory>
 #include <IO/HTTPCommon.h>
 #include <IO/ParallelReadBuffer.h>
 #include <IO/ReadBuffer.h>
 #include <IO/ReadSettings.h>
 #include <IO/SeekableReadBuffer.h>
@ -30,7 +32,9 @@ private:
    String bucket;
    String key;
    UInt64 max_single_read_retries;
    off_t offset = 0;
    off_t read_until_position = 0;
    Aws::S3::Model::GetObjectResult read_result;
    std::unique_ptr<ReadBuffer> impl;
@ -45,6 +49,7 @@ public:
        UInt64 max_single_read_retries_,
        const ReadSettings & settings_,
        bool use_external_buffer = false,
        size_t offset_ = 0,
        size_t read_until_position_ = 0,
        bool restricted_seek_ = false);
@ -58,7 +63,7 @@ public:
    void setReadUntilPosition(size_t position) override;
-    Range getRemainingReadRange() const override { return Range{ .left = static_cast<size_t>(offset), .right = read_until_position }; }
+    Range getRemainingReadRange() const override;
    size_t getFileOffsetOfBufferEnd() const override { return offset; }
@ -69,13 +74,55 @@ private:
    bool use_external_buffer;
    off_t read_until_position = 0;
    /// There is different seek policy for disk seek and for non-disk seek
    /// (non-disk seek is applied for seekable input formats: orc, arrow, parquet).
    bool restricted_seek;
 };
 /// Creates separate ReadBufferFromS3 for sequence of ranges of particular object
 class ReadBufferS3Factory : public ParallelReadBuffer::ReadBufferFactory
 {
 public:
    explicit ReadBufferS3Factory(
        std::shared_ptr<Aws::S3::S3Client> client_ptr_,
        const String & bucket_,
        const String & key_,
        size_t range_step_,
        size_t object_size_,
        UInt64 s3_max_single_read_retries_,
        const ReadSettings & read_settings_)
        : client_ptr(client_ptr_)
        , bucket(bucket_)
        , key(key_)
        , read_settings(read_settings_)
        , range_generator(object_size_, range_step_)
        , range_step(range_step_)
        , object_size(object_size_)
        , s3_max_single_read_retries(s3_max_single_read_retries_)
    {
        assert(range_step > 0);
        assert(range_step < object_size);
    }
    SeekableReadBufferPtr getReader() override;
    off_t seek(off_t off, [[maybe_unused]] int whence) override;
    std::optional<size_t> getTotalSize() override;
 private:
    std::shared_ptr<Aws::S3::S3Client> client_ptr;
    const String bucket;
    const String key;
    ReadSettings read_settings;
    RangeGenerator range_generator;
    size_t range_step;
    size_t object_size;
    UInt64 s3_max_single_read_retries;
 };
 }
 #endif
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@ -1,6 +1,7 @@
 #pragma once
 #include <functional>
 #include <Common/RangeGenerator.h>
 #include <IO/ConnectionTimeouts.h>
 #include <IO/HTTPCommon.h>
 #include <IO/ParallelReadBuffer.h>
@ -635,43 +636,6 @@ public:
    void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
 };
 class RangeGenerator
 {
 public:
    explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
        : from(range_start), range_step(range_step_), total_size(total_size_)
    {
    }
    size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
    using Range = std::pair<size_t, size_t>;
    // return upper exclusive range of values, i.e. [from_range, to_range>
    std::optional<Range> nextRange()
    {
        if (from >= total_size)
        {
            return std::nullopt;
        }
        auto to = from + range_step;
        if (to >= total_size)
        {
            to = total_size;
        }
        Range range{from, to};
        from = to;
        return range;
    }
 private:
    size_t from;
    size_t range_step;
    size_t total_size;
 };
 class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
 {
    using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@ -24,6 +24,7 @@
 #    include <aws/core/utils/UUID.h>
 #    include <aws/core/http/HttpClientFactory.h>
 #    include <aws/s3/S3Client.h>
 #    include <aws/s3/model/HeadObjectRequest.h>  // Y_IGNORE
 #    include <IO/S3/PocoHTTPClientFactory.h>
 #    include <IO/S3/PocoHTTPClient.h>
@ -682,6 +683,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
    extern const int S3_ERROR;
 }
 namespace S3
@ -839,6 +841,26 @@ namespace S3
            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI:     {}{}",
                            quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
    }
    size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error)
    {
        Aws::S3::Model::HeadObjectRequest req;
        req.SetBucket(bucket);
        req.SetKey(key);
        Aws::S3::Model::HeadObjectOutcome outcome = client_ptr->HeadObject(req);
        if (outcome.IsSuccess())
        {
            auto read_result = outcome.GetResultWithOwnership();
            return static_cast<size_t>(read_result.GetContentLength());
        }
        else if (throw_on_error)
        {
            throw DB::Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
        }
        return 0;
    }
 }
 }
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@ -75,6 +75,8 @@ struct URI
    static void validateBucket(const String & bucket, const Poco::URI & uri);
 };
 size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error = true);
 }
 #endif
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@ -1,4 +1,6 @@
 #include <algorithm>
 #include <future>
 #include <numeric>
 #include <Poco/Util/Application.h>
 #include <base/sort.h>
@ -15,6 +17,7 @@
 #include <IO/WriteBufferFromFile.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <Interpreters/Aggregator.h>
 #include <Common/LRUCache.h>
 #include <Common/MemoryTracker.h>
 #include <Common/CurrentThread.h>
 #include <Common/typeid_cast.h>
@ -27,12 +30,236 @@
 #include <Interpreters/JIT/CompiledExpressionCache.h>
 #include <Core/ProtocolDefines.h>
 #include <Parsers/ASTSelectQuery.h>
 namespace ProfileEvents
 {
-    extern const Event ExternalAggregationWritePart;
+extern const Event ExternalAggregationWritePart;
-    extern const Event ExternalAggregationCompressedBytes;
+extern const Event ExternalAggregationCompressedBytes;
-    extern const Event ExternalAggregationUncompressedBytes;
+extern const Event ExternalAggregationUncompressedBytes;
 extern const Event AggregationPreallocatedElementsInHashTables;
 extern const Event AggregationHashTablesInitializedAsTwoLevel;
 }
 namespace
 {
 /** Collects observed HashMap-s sizes to avoid redundant intermediate resizes.
  */
 class HashTablesStatistics
 {
 public:
    struct Entry
    {
        size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning
        size_t median_size; // roughly the size we're going to preallocate on each thread
    };
    using Cache = DB::LRUCache<UInt64, Entry>;
    using CachePtr = std::shared_ptr<Cache>;
    using Params = DB::Aggregator::Params::StatsCollectingParams;
    /// Collection and use of the statistics should be enabled.
    std::optional<Entry> getSizeHint(const Params & params)
    {
        if (!params.isCollectionAndUseEnabled())
            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
        std::lock_guard lock(mutex);
        const auto cache = getHashTableStatsCache(params, lock);
        if (const auto hint = cache->get(params.key))
        {
            LOG_DEBUG(
                &Poco::Logger::get("Aggregator"),
                "An entry for key={} found in cache: sum_of_sizes={}, median_size={}",
                params.key,
                hint->sum_of_sizes,
                hint->median_size);
            return *hint;
        }
        return std::nullopt;
    }
    /// Collection and use of the statistics should be enabled.
    void update(size_t sum_of_sizes, size_t median_size, const Params & params)
    {
        if (!params.isCollectionAndUseEnabled())
            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
        std::lock_guard lock(mutex);
        const auto cache = getHashTableStatsCache(params, lock);
        const auto hint = cache->get(params.key);
        // We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong.
        if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2
            || hint->median_size < median_size)
        {
            LOG_DEBUG(
                &Poco::Logger::get("Aggregator"),
                "Statistics updated for key={}: new sum_of_sizes={}, median_size={}",
                params.key,
                sum_of_sizes,
                median_size);
            cache->set(params.key, std::make_shared<Entry>(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size}));
        }
    }
    std::optional<DB::HashTablesCacheStatistics> getCacheStats() const
    {
        std::lock_guard lock(mutex);
        if (hash_table_stats)
        {
            size_t hits = 0, misses = 0;
            hash_table_stats->getStats(hits, misses);
            return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses};
        }
        return std::nullopt;
    }
    static size_t calculateCacheKey(const DB::ASTPtr & select_query)
    {
        if (!select_query)
            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null");
        const auto & select = select_query->as<DB::ASTSelectQuery &>();
        // It may happen in some corner cases like `select 1 as num group by num`.
        if (!select.tables())
            return 0;
        SipHash hash;
        hash.update(select.tables()->getTreeHash());
        if (const auto where = select.where())
            hash.update(where->getTreeHash());
        if (const auto group_by = select.groupBy())
            hash.update(group_by->getTreeHash());
        return hash.get64();
    }
 private:
    CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard<std::mutex> &)
    {
        if (!hash_table_stats || hash_table_stats->maxSize() != params.max_entries_for_hash_table_stats)
            hash_table_stats = std::make_shared<Cache>(params.max_entries_for_hash_table_stats);
        return hash_table_stats;
    }
    mutable std::mutex mutex;
    CachePtr hash_table_stats;
 };
 HashTablesStatistics & getHashTablesStatistics()
 {
    static HashTablesStatistics hash_tables_stats;
    return hash_tables_stats;
 }
 bool worthConvertToTwoLevel(
    size_t group_by_two_level_threshold, size_t result_size, size_t group_by_two_level_threshold_bytes, auto result_size_bytes)
 {
    // params.group_by_two_level_threshold will be equal to 0 if we have only one thread to execute aggregation (refer to AggregatingStep::transformPipeline).
    return (group_by_two_level_threshold && result_size >= group_by_two_level_threshold)
        || (group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(group_by_two_level_threshold_bytes));
 }
 DB::AggregatedDataVariants::Type convertToTwoLevelTypeIfPossible(DB::AggregatedDataVariants::Type type)
 {
    using Type = DB::AggregatedDataVariants::Type;
    switch (type)
    {
 #define M(NAME) \
    case Type::NAME: \
        return Type::NAME##_two_level;
        APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M)
 #undef M
        default:
            return type;
    }
    __builtin_unreachable();
 }
 void initDataVariantsWithSizeHint(
    DB::AggregatedDataVariants & result, DB::AggregatedDataVariants::Type method_chosen, const DB::Aggregator::Params & params)
 {
    const auto & stats_collecting_params = params.stats_collecting_params;
    if (stats_collecting_params.isCollectionAndUseEnabled())
    {
        if (auto hint = getHashTablesStatistics().getSizeHint(stats_collecting_params))
        {
            const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1;
            const auto lower_limit = hint->sum_of_sizes / max_threads;
            const auto upper_limit = stats_collecting_params.max_size_to_preallocate_for_aggregation / max_threads;
            const auto adjusted = std::min(std::max(lower_limit, hint->median_size), upper_limit);
            if (worthConvertToTwoLevel(
                    params.group_by_two_level_threshold,
                    hint->sum_of_sizes,
                    /*group_by_two_level_threshold_bytes*/ 0,
                    /*result_size_bytes*/ 0))
                method_chosen = convertToTwoLevelTypeIfPossible(method_chosen);
            result.init(method_chosen, adjusted);
            ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel());
            return;
        }
    }
    result.init(method_chosen);
 }
 /// Collection and use of the statistics should be enabled.
 void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::Aggregator::Params::StatsCollectingParams & params)
 {
    if (!params.isCollectionAndUseEnabled())
        throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
    std::vector<size_t> sizes(data_variants.size());
    for (size_t i = 0; i < data_variants.size(); ++i)
        sizes[i] = data_variants[i]->size();
    const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though...
    std::nth_element(sizes.begin(), median_size, sizes.end());
    const auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull);
    getHashTablesStatistics().update(sum_of_sizes, *median_size, params);
 }
 // The std::is_constructible trait isn't suitable here because some classes have template constructors with semantics different from providing size hints.
 // Also string hash table variants are not supported due to the fact that both local perf tests and tests in CI showed slowdowns for them.
 template <typename...>
 struct HasConstructorOfNumberOfElements : std::false_type
 {
 };
 template <typename... Ts>
 struct HasConstructorOfNumberOfElements<HashMapTable<Ts...>> : std::true_type
 {
 };
 template <typename Key, typename Cell, typename Hash, typename Grower, typename Allocator, template <typename...> typename ImplTable>
 struct HasConstructorOfNumberOfElements<TwoLevelHashMapTable<Key, Cell, Hash, Grower, Allocator, ImplTable>> : std::true_type
 {
 };
 template <typename... Ts>
 struct HasConstructorOfNumberOfElements<HashTable<Ts...>> : std::true_type
 {
 };
 template <typename... Ts>
 struct HasConstructorOfNumberOfElements<TwoLevelHashTable<Ts...>> : std::true_type
 {
 };
 template <template <typename> typename Method, typename Base>
 struct HasConstructorOfNumberOfElements<Method<Base>> : HasConstructorOfNumberOfElements<Base>
 {
 };
 template <typename Method>
 auto constructWithReserveIfPossible(size_t size_hint)
 {
    if constexpr (HasConstructorOfNumberOfElements<typename Method::Data>::value)
    {
        ProfileEvents::increment(ProfileEvents::AggregationPreallocatedElementsInHashTables, size_hint);
        return std::make_unique<Method>(size_hint);
    }
    else
        return std::make_unique<Method>();
 }
 }
 namespace DB
@ -64,6 +291,10 @@ AggregatedDataVariants::~AggregatedDataVariants()
    }
 }
 std::optional<HashTablesCacheStatistics> getHashTablesCacheStatistics()
 {
    return getHashTablesStatistics().getCacheStats();
 }
 void AggregatedDataVariants::convertToTwoLevel()
 {
@ -88,6 +319,47 @@ void AggregatedDataVariants::convertToTwoLevel()
    }
 }
 void AggregatedDataVariants::init(Type type_, std::optional<size_t> size_hint)
 {
    switch (type_)
    {
        case Type::EMPTY:
            break;
        case Type::without_key:
            break;
 #define M(NAME, IS_TWO_LEVEL) \
    case Type::NAME: \
        if (size_hint) \
            (NAME) = constructWithReserveIfPossible<decltype(NAME)::element_type>(*size_hint); \
        else \
            (NAME) = std::make_unique<decltype(NAME)::element_type>(); \
        break;
            APPLY_FOR_AGGREGATED_VARIANTS(M)
 #undef M
    }
    type = type_;
 }
 Aggregator::Params::StatsCollectingParams::StatsCollectingParams() = default;
 Aggregator::Params::StatsCollectingParams::StatsCollectingParams(
    const ASTPtr & select_query_,
    bool collect_hash_table_stats_during_aggregation_,
    size_t max_entries_for_hash_table_stats_,
    size_t max_size_to_preallocate_for_aggregation_)
    : key(collect_hash_table_stats_during_aggregation_ ? HashTablesStatistics::calculateCacheKey(select_query_) : 0)
    , max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_)
    , max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_)
 {
 }
 bool Aggregator::Params::StatsCollectingParams::isCollectionAndUseEnabled() const
 {
    return key != 0;
 }
 Block Aggregator::getHeader(bool final) const
 {
    return params.getHeader(final);
@ -237,8 +509,7 @@ public:
 #endif
-Aggregator::Aggregator(const Params & params_)
+Aggregator::Aggregator(const Params & params_) : params(params_)
    : params(params_)
 {
    /// Use query-level memory tracker
    if (auto * memory_tracker_child = CurrentThread::getMemoryTracker())
@ -292,7 +563,6 @@ Aggregator::Aggregator(const Params & params_)
 #if USE_EMBEDDED_COMPILER
    compileAggregateFunctionsIfNeeded();
 #endif
 }
 #if USE_EMBEDDED_COMPILER
@ -958,7 +1228,7 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
    /// How to perform the aggregation?
    if (result.empty())
    {
-        result.init(method_chosen);
+        initDataVariantsWithSizeHint(result, method_chosen, params);
        result.keys_size = params.keys_size;
        result.key_sizes = key_sizes;
        LOG_TRACE(log, "Aggregation method: {}", result.getMethodName());
@ -1038,9 +1308,8 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
    /// Here all the results in the sum are taken into account, from different threads.
    auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
-    bool worth_convert_to_two_level
+    bool worth_convert_to_two_level = worthConvertToTwoLevel(
-        = (params.group_by_two_level_threshold && result_size >= params.group_by_two_level_threshold)
+        params.group_by_two_level_threshold, result_size, params.group_by_two_level_threshold_bytes, result_size_bytes);
        || (params.group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(params.group_by_two_level_threshold_bytes));
    /** Converting to a two-level data structure.
      * It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel.
@ -1327,10 +1596,7 @@ void Aggregator::convertToBlockImpl(
 template <typename Mapped>
-inline void Aggregator::insertAggregatesIntoColumns(
+inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColumns & final_aggregate_columns, Arena * arena) const
    Mapped & mapped,
    MutableColumns & final_aggregate_columns,
    Arena * arena) const
 {
    /** Final values of aggregate functions are inserted to columns.
      * Then states of aggregate functions, that are not longer needed, are destroyed.
@ -2179,6 +2445,9 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData
    LOG_TRACE(log, "Merging aggregated data");
    if (params.stats_collecting_params.isCollectionAndUseEnabled())
        updateStatistics(data_variants, params.stats_collecting_params);
    ManyAggregatedDataVariants non_empty_data;
    non_empty_data.reserve(data_variants.size());
    for (auto & data : data_variants)
@ -2388,9 +2657,8 @@ bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool
    /// Here all the results in the sum are taken into account, from different threads.
    auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
-    bool worth_convert_to_two_level
+    bool worth_convert_to_two_level = worthConvertToTwoLevel(
-        = (params.group_by_two_level_threshold && result_size >= params.group_by_two_level_threshold)
+        params.group_by_two_level_threshold, result_size, params.group_by_two_level_threshold_bytes, result_size_bytes);
        || (params.group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(params.group_by_two_level_threshold_bytes));
    /** Converting to a two-level data structure.
      * It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel.
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@ -34,6 +34,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnLowCardinality.h>
 #include <Parsers/IAST_fwd.h>
 namespace DB
 {
@ -129,6 +130,7 @@ private:
 template <typename Base>
 struct AggregationDataWithNullKeyTwoLevel : public Base
 {
    using Base::Base;
    using Base::impls;
    AggregationDataWithNullKeyTwoLevel() = default;
@ -183,6 +185,8 @@ struct AggregationMethodOneNumber
    AggregationMethodOneNumber() = default;
    explicit AggregationMethodOneNumber(size_t size_hint) : data(size_hint) { }
    template <typename Other>
    explicit AggregationMethodOneNumber(const Other & other) : data(other.data)
    {
@ -225,6 +229,8 @@ struct AggregationMethodString
    {
    }
    explicit AggregationMethodString(size_t size_hint) : data(size_hint) { }
    using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
    static const bool low_cardinality_optimization = false;
@ -250,6 +256,8 @@ struct AggregationMethodStringNoCache
    AggregationMethodStringNoCache() = default;
    explicit AggregationMethodStringNoCache(size_t size_hint) : data(size_hint) { }
    template <typename Other>
    explicit AggregationMethodStringNoCache(const Other & other) : data(other.data)
    {
@ -280,6 +288,8 @@ struct AggregationMethodFixedString
    AggregationMethodFixedString() = default;
    explicit AggregationMethodFixedString(size_t size_hint) : data(size_hint) { }
    template <typename Other>
    explicit AggregationMethodFixedString(const Other & other) : data(other.data)
    {
@ -309,6 +319,8 @@ struct AggregationMethodFixedStringNoCache
    AggregationMethodFixedStringNoCache() = default;
    explicit AggregationMethodFixedStringNoCache(size_t size_hint) : data(size_hint) { }
    template <typename Other>
    explicit AggregationMethodFixedStringNoCache(const Other & other) : data(other.data)
    {
@ -382,6 +394,8 @@ struct AggregationMethodKeysFixed
    AggregationMethodKeysFixed() = default;
    explicit AggregationMethodKeysFixed(size_t size_hint) : data(size_hint) { }
    template <typename Other>
    explicit AggregationMethodKeysFixed(const Other & other) : data(other.data)
    {
@ -473,6 +487,8 @@ struct AggregationMethodSerialized
    AggregationMethodSerialized() = default;
    explicit AggregationMethodSerialized(size_t size_hint) : data(size_hint) { }
    template <typename Other>
    explicit AggregationMethodSerialized(const Other & other) : data(other.data)
    {
@ -652,21 +668,7 @@ struct AggregatedDataVariants : private boost::noncopyable
    ~AggregatedDataVariants();
-    void init(Type type_)
+    void init(Type type_, std::optional<size_t> size_hint = std::nullopt);
    {
        switch (type_)
        {
            case Type::EMPTY:       break;
            case Type::without_key: break;
        #define M(NAME, IS_TWO_LEVEL) \
            case Type::NAME: (NAME) = std::make_unique<decltype(NAME)::element_type>(); break;
            APPLY_FOR_AGGREGATED_VARIANTS(M)
        #undef M
        }
        type = type_;
    }
    /// Number of rows (different keys).
    size_t size() const
@ -929,29 +931,61 @@ public:
        bool compile_aggregate_expressions;
        size_t min_count_to_compile_aggregate_expression;
        struct StatsCollectingParams
        {
            StatsCollectingParams();
            StatsCollectingParams(
                const ASTPtr & select_query_,
                bool collect_hash_table_stats_during_aggregation_,
                size_t max_entries_for_hash_table_stats_,
                size_t max_size_to_preallocate_for_aggregation_);
            bool isCollectionAndUseEnabled() const;
            const UInt64 key = 0;
            const size_t max_entries_for_hash_table_stats = 0;
            const size_t max_size_to_preallocate_for_aggregation = 0;
        };
        StatsCollectingParams stats_collecting_params;
        Params(
            const Block & src_header_,
-            const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_,
+            const ColumnNumbers & keys_,
-            bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
+            const AggregateDescriptions & aggregates_,
-            size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_,
+            bool overflow_row_,
            size_t max_rows_to_group_by_,
            OverflowMode group_by_overflow_mode_,
            size_t group_by_two_level_threshold_,
            size_t group_by_two_level_threshold_bytes_,
            size_t max_bytes_before_external_group_by_,
            bool empty_result_for_aggregation_by_empty_set_,
-            VolumePtr tmp_volume_, size_t max_threads_,
+            VolumePtr tmp_volume_,
            size_t max_threads_,
            size_t min_free_disk_space_,
            bool compile_aggregate_expressions_,
            size_t min_count_to_compile_aggregate_expression_,
-            const Block & intermediate_header_ = {})
+            const Block & intermediate_header_ = {},
-            : src_header(src_header_),
+            const StatsCollectingParams & stats_collecting_params_ = {})
-            intermediate_header(intermediate_header_),
+            : src_header(src_header_)
-            keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()),
+            , intermediate_header(intermediate_header_)
-            overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_),
+            , keys(keys_)
-            group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_),
+            , aggregates(aggregates_)
-            max_bytes_before_external_group_by(max_bytes_before_external_group_by_),
+            , keys_size(keys.size())
-            empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_),
+            , aggregates_size(aggregates.size())
-            tmp_volume(tmp_volume_), max_threads(max_threads_),
+            , overflow_row(overflow_row_)
-            min_free_disk_space(min_free_disk_space_),
+            , max_rows_to_group_by(max_rows_to_group_by_)
-            compile_aggregate_expressions(compile_aggregate_expressions_),
+            , group_by_overflow_mode(group_by_overflow_mode_)
-            min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
+            , group_by_two_level_threshold(group_by_two_level_threshold_)
            , group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_)
            , max_bytes_before_external_group_by(max_bytes_before_external_group_by_)
            , empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_)
            , tmp_volume(tmp_volume_)
            , max_threads(max_threads_)
            , min_free_disk_space(min_free_disk_space_)
            , compile_aggregate_expressions(compile_aggregate_expressions_)
            , min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
            , stats_collecting_params(stats_collecting_params_)
        {
        }
@ -1350,4 +1384,13 @@ APPLY_FOR_AGGREGATED_VARIANTS(M)
 #undef M
 struct HashTablesCacheStatistics
 {
    size_t entries = 0;
    size_t hits = 0;
    size_t misses = 0;
 };
 std::optional<HashTablesCacheStatistics> getHashTablesCacheStatistics();
 }
--- a/src/Interpreters/AsynchronousMetrics.cpp
+++ b/src/Interpreters/AsynchronousMetrics.cpp
@ -1,3 +1,4 @@
 #include <Interpreters/Aggregator.h>
 #include <Interpreters/AsynchronousMetrics.h>
 #include <Interpreters/AsynchronousMetricLog.h>
 #include <Interpreters/JIT/CompiledExpressionCache.h>
@ -630,6 +631,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
    new_values["Uptime"] = getContext()->getUptimeSeconds();
    {
        if (const auto stats = getHashTablesCacheStatistics())
        {
            new_values["HashTableStatsCacheEntries"] = stats->entries;
            new_values["HashTableStatsCacheHits"] = stats->hits;
            new_values["HashTableStatsCacheMisses"] = stats->misses;
        }
    }
    /// Process process memory usage according to OS
 #if defined(OS_LINUX) || defined(OS_FREEBSD)
    {
--- a/src/Interpreters/CatBoostModel.cpp
+++ b/src/Interpreters/CatBoostModel.cpp
@ -169,6 +169,7 @@ public:
        if (columns.size() != float_features_count + cat_features_count)
            throw Exception(ErrorCodes::BAD_ARGUMENTS,
                "Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
                columns.size(),
                float_features_count,
                cat_features_count);
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@ -233,7 +233,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
        {
            assert(!db_and_table.first && !db_and_table.second);
            if (exception)
-                exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
+                exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
            return {};
        }
@ -263,7 +263,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
        /// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context.
        /// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users.
        if (exception)
-            exception->emplace(ErrorCodes::DATABASE_ACCESS_DENIED, "Direct access to `{}` database is not allowed", String(TEMPORARY_DATABASE));
+            exception->emplace(fmt::format("Direct access to `{}` database is not allowed", TEMPORARY_DATABASE), ErrorCodes::DATABASE_ACCESS_DENIED);
        return {};
    }
@ -274,7 +274,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
        if (databases.end() == it)
        {
            if (exception)
-                exception->emplace(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()));
+                exception->emplace(fmt::format("Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName())), ErrorCodes::UNKNOWN_DATABASE);
            return {};
        }
        database = it->second;
@ -282,7 +282,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
    auto table = database->tryGetTable(table_id.table_name, context_);
    if (!table && exception)
-            exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
+            exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
    if (!table)
        database = nullptr;
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -2082,6 +2082,12 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
    const Settings & settings = context->getSettingsRef();
    const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams(
        query_ptr,
        settings.collect_hash_table_stats_during_aggregation,
        settings.max_entries_for_hash_table_stats,
        settings.max_size_to_preallocate_for_aggregation);
    Aggregator::Params params(
        header_before_aggregation,
        keys,
@ -2099,7 +2105,9 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
        settings.max_threads,
        settings.min_free_disk_space_for_temporary_data,
        settings.compile_aggregate_expressions,
-        settings.min_count_to_compile_aggregate_expression);
+        settings.min_count_to_compile_aggregate_expression,
        Block{},
        stats_collecting_params);
    SortDescription group_by_sort_description;
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@ -320,12 +320,13 @@ Chunk DDLQueryStatusSource::generate()
            if (throw_on_timeout)
            {
                if (!first_exception)
-                    first_exception = std::make_unique<Exception>(ErrorCodes::TIMEOUT_EXCEEDED, msg_format,
+                    first_exception = std::make_unique<Exception>(
-                        node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
+                        fmt::format(msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts),
                        ErrorCodes::TIMEOUT_EXCEEDED);
                return {};
            }
-            LOG_INFO(log, fmt::runtime(msg_format), node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
+            LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
            NameSet unfinished_hosts = waiting_hosts;
            for (const auto & host_id : finished_hosts)
@ -358,9 +359,12 @@ Chunk DDLQueryStatusSource::generate()
            /// Paradoxically, this exception will be throw even in case of "never_throw" mode.
            if (!first_exception)
-                first_exception = std::make_unique<Exception>(ErrorCodes::UNFINISHED,
+                first_exception = std::make_unique<Exception>(
-                    "Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
+                    fmt::format(
-                    " since it was finished (or its lifetime is expired)", node_path);
+                        "Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
                        " since it was finished (or its lifetime is expired)",
                        node_path),
                    ErrorCodes::UNFINISHED);
            return {};
        }
@ -386,7 +390,8 @@ Chunk DDLQueryStatusSource::generate()
            if (status.code != 0 && !first_exception
                && context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
            {
-                first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
+                first_exception = std::make_unique<Exception>(
                    fmt::format("There was an error on [{}:{}]: {}", host, port, status.message), status.code);
            }
            ++num_hosts_finished;
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@ -359,7 +359,7 @@ bool MsgPackVisitor::visit_ext(const char * value, uint32_t size)
        return true;
    }
-    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type);
+    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {:x}", type);
 }
 void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
@ -498,7 +498,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
            msgpack::object_ext object_ext = object.via.ext;
            if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUIDType))
                return std::make_shared<DataTypeUUID>();
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type());
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type());
        }
    }
    __builtin_unreachable();
--- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
+++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
@ -1,10 +1,13 @@
 // Needs to go first because its partial specialization of fmt::formatter
 // should be defined before any instantiation
 #include <fmt/ostream.h>
 #include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
 #include <base/logger_useful.h>
 #include <cppkafka/cppkafka.h>
 #include <boost/algorithm/string/join.hpp>
 #include <fmt/ostream.h>
 #include <algorithm>
 namespace DB
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@ -1,4 +1,6 @@
 #include <Common/config.h>
 #include "IO/ParallelReadBuffer.h"
 #include "IO/IOThreadPool.h"
 #include "Parsers/ASTCreateQuery.h"
 #if USE_AWS_S3
@ -238,7 +240,8 @@ StorageS3Source::StorageS3Source(
    String compression_hint_,
    const std::shared_ptr<Aws::S3::S3Client> & client_,
    const String & bucket_,
-    std::shared_ptr<IteratorWrapper> file_iterator_)
+    std::shared_ptr<IteratorWrapper> file_iterator_,
    const size_t download_thread_num_)
    : SourceWithProgress(getHeader(sample_block_, need_path, need_file))
    , WithContext(context_)
    , name(std::move(name_))
@ -254,6 +257,7 @@ StorageS3Source::StorageS3Source(
    , with_file_column(need_file)
    , with_path_column(need_path)
    , file_iterator(file_iterator_)
    , download_thread_num(download_thread_num_)
 {
    initialize();
 }
@ -275,28 +279,79 @@ bool StorageS3Source::initialize()
    file_path = fs::path(bucket) / current_key;
-    read_buf = wrapReadBufferWithCompressionMethod(
+    read_buf = wrapReadBufferWithCompressionMethod(createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint));
-        std::make_unique<ReadBufferFromS3>(client, bucket, current_key, max_single_read_retries, getContext()->getReadSettings()),
+
        chooseCompressionMethod(current_key, compression_hint));
    auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings);
    QueryPipelineBuilder builder;
    builder.init(Pipe(input_format));
    if (columns_desc.hasDefaults())
    {
-        builder.addSimpleTransform([&](const Block & header)
+        builder.addSimpleTransform(
-        {
+            [&](const Block & header)
-            return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext());
+            { return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext()); });
        });
    }
    pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
    reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
    initialized = false;
    return true;
 }
 std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & key)
 {
    const size_t object_size = DB::S3::getObjectSize(client, bucket, key, false);
    auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
    const bool use_parallel_download = download_buffer_size > 0 && download_thread_num > 1;
    const bool object_too_small = object_size < download_thread_num * download_buffer_size;
    if (!use_parallel_download || object_too_small)
    {
        LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size);
        return std::make_unique<ReadBufferFromS3>(client, bucket, key, max_single_read_retries, getContext()->getReadSettings());
    }
    assert(object_size > 0);
    if (download_buffer_size < DBMS_DEFAULT_BUFFER_SIZE)
    {
        LOG_WARNING(log, "Downloading buffer {} bytes too small, set at least {} bytes", download_buffer_size, DBMS_DEFAULT_BUFFER_SIZE);
        download_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
    }
    auto factory = std::make_unique<ReadBufferS3Factory>(
        client, bucket, key, download_buffer_size, object_size, max_single_read_retries, getContext()->getReadSettings());
    LOG_TRACE(
        log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size);
    ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()
        ? CurrentThread::get().getThreadGroup()
        : MainThreadStatus::getInstance().getThreadGroup();
    ContextPtr query_context = CurrentThread::isInitialized() ? CurrentThread::get().getQueryContext() : nullptr;
    auto worker_cleanup = [has_running_group = running_group == nullptr](ThreadStatus & thread_status)
    {
        if (has_running_group)
            thread_status.detachQuery(false);
    };
    auto worker_setup = [query_context = std::move(query_context),
                         running_group = std::move(running_group)](ThreadStatus & thread_status)
    {
        /// Save query context if any, because cache implementation needs it.
        if (query_context)
            thread_status.attachQueryContext(query_context);
        /// To be able to pass ProfileEvents.
        if (running_group)
            thread_status.attachQuery(running_group);
    };
    return std::make_unique<ParallelReadBuffer>(
        std::move(factory), &IOThreadPool::get(), download_thread_num, std::move(worker_setup), std::move(worker_cleanup));
 }
 String StorageS3Source::getName() const
 {
    return name;
@ -670,6 +725,7 @@ Pipe StorageS3::read(
        block_for_format = storage_snapshot->metadata->getSampleBlock();
    }
    const size_t max_download_threads = local_context->getSettingsRef().max_download_threads;
    for (size_t i = 0; i < num_streams; ++i)
    {
        pipes.emplace_back(std::make_shared<StorageS3Source>(
@ -686,7 +742,8 @@ Pipe StorageS3::read(
            compression_method,
            client_auth.client,
            client_auth.uri.bucket,
-            iterator_wrapper));
+            iterator_wrapper,
            max_download_threads));
    }
    auto pipe = Pipe::unitePipes(std::move(pipes));
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@ -74,7 +74,8 @@ public:
        String compression_hint_,
        const std::shared_ptr<Aws::S3::S3Client> & client_,
        const String & bucket,
-        std::shared_ptr<IteratorWrapper> file_iterator_);
+        std::shared_ptr<IteratorWrapper> file_iterator_,
        size_t download_thread_num);
    String getName() const override;
@ -101,13 +102,17 @@ private:
    std::unique_ptr<PullingPipelineExecutor> reader;
    /// onCancel and generate can be called concurrently
    std::mutex reader_mutex;
    bool initialized = false;
    bool with_file_column = false;
    bool with_path_column = false;
    std::shared_ptr<IteratorWrapper> file_iterator;
    size_t download_thread_num = 1;
    Poco::Logger * log = &Poco::Logger::get("StorageS3Source");
    /// Recreate ReadBuffer and BlockInputStream for each file.
    bool initialize();
    std::unique_ptr<ReadBuffer> createS3ReadBuffer(const String & key);
 };
 /**
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@ -54,6 +54,7 @@ def get_packager_cmd(
    build_version: str,
    image_version: str,
    ccache_path: str,
    official: bool,
 ) -> str:
    package_type = build_config["package_type"]
    comp = build_config["compiler"]
@ -83,6 +84,9 @@ def get_packager_cmd(
    if _can_export_binaries(build_config):
        cmd += " --with-binaries=tests"
    if official:
        cmd += " --official"
    return cmd
@ -254,9 +258,11 @@ def main():
    logging.info("Got version from repo %s", version.string)
    official_flag = pr_info.number == 0
    version_type = "testing"
    if "release" in pr_info.labels or "release-lts" in pr_info.labels:
        version_type = "stable"
        official_flag = True
    update_version_local(REPO_COPY, version, version_type)
@ -290,7 +296,9 @@ def main():
        version.string,
        image_version,
        ccache_path,
        official=official_flag,
    )
    logging.info("Going to run packager with %s", packager_cmd)
    build_clickhouse_log = os.path.join(TEMP_PATH, "build_log")
--- a/tests/ci/build_report_check.py
+++ b/tests/ci/build_report_check.py
@ -233,7 +233,11 @@ if __name__ == "__main__":
    if ok_builds == 0 or some_builds_are_missing:
        summary_status = "error"
-    description = f"{ok_builds}/{total_builds} builds are OK"
+    addition = ""
    if some_builds_are_missing:
        addition = "(some builds are missing)"
    description = f"{ok_builds}/{total_builds} builds are OK {addition}"
    print("::notice ::Report url: {}".format(url))
@ -244,3 +248,6 @@ if __name__ == "__main__":
        state=summary_status,
        target_url=url,
    )
    if summary_status == "error":
        sys.exit(1)
--- a/tests/ci/cherry_pick_utils/parser.py
+++ b/tests/ci/cherry_pick_utils/parser.py
@ -20,8 +20,6 @@ class Description:
    def __init__(self, pull_request):
        self.label_name = str()
        self.legal = False
        self._parse(pull_request["bodyText"])
    def _parse(self, text):
@ -39,12 +37,6 @@ class Description:
                category = stripped
                next_category = False
            if (
                stripped
                == "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
            ):
                self.legal = True
            category_headers = (
                "Category (leave one):",
                "Changelog category (leave one):",
--- a/tests/ci/clickhouse_helper.py
+++ b/tests/ci/clickhouse_helper.py
@ -15,7 +15,7 @@ class ClickHouseHelper:
        self.url = url
        self.auth = {
            "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
-            "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password")
+            "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"),
        }
    @staticmethod
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@ -59,3 +59,17 @@ def post_commit_status_to_file(file_path, description, state, report_url):
    with open(file_path, "w", encoding="utf-8") as f:
        out = csv.writer(f, delimiter="\t")
        out.writerow([state, report_url, description])
 def remove_labels(gh, pr_info, labels_names):
    repo = gh.get_repo(GITHUB_REPOSITORY)
    pull_request = repo.get_pull(pr_info.number)
    for label in labels_names:
        pull_request.remove_from_labels(label)
 def post_labels(gh, pr_info, labels_names):
    repo = gh.get_repo(GITHUB_REPOSITORY)
    pull_request = repo.get_pull(pr_info.number)
    for label in labels_names:
        pull_request.add_to_labels(label)
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@ -197,4 +197,8 @@ if __name__ == "__main__":
        report_url,
        CHECK_NAME,
    )
    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    if state == "error":
        sys.exit(1)
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@ -7,6 +7,7 @@ import platform
 import shutil
 import subprocess
 import time
 import sys
 from typing import Dict, List, Optional, Set, Tuple, Union
 from github import Github
@ -461,6 +462,9 @@ def main():
    ch_helper = ClickHouseHelper()
    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    if status == "error":
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/tests/ci/docs_check.py
+++ b/tests/ci/docs_check.py
@ -114,4 +114,7 @@ if __name__ == "__main__":
        report_url,
        NAME,
    )
    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    if status == "error":
        sys.exit(1)
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@ -208,7 +208,7 @@ if __name__ == "__main__":
    # Refuse other checks to run if fast test failed
    if state != "success":
-        if "force-tests" in pr_info.labels:
+        if "force-tests" in pr_info.labels and state != "error":
            print("'force-tests' enabled, will report success")
        else:
            sys.exit(1)
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@ -279,4 +279,8 @@ if __name__ == "__main__":
        report_url,
        check_name_with_group,
    )
    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    if state == "error":
        sys.exit(1)
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@ -217,3 +217,6 @@ if __name__ == "__main__":
    post_commit_status(
        gh, pr_info.sha, check_name_with_group, message, status, report_url
    )
    if status == "error":
        sys.exit(1)
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@ -236,6 +236,15 @@ class PRInfo:
                return True
        return False
    def has_changes_in_submodules(self):
        if self.changed_files is None or not self.changed_files:
            return True
        for f in self.changed_files:
            if "contrib" in f:
                return True
        return False
    def can_skip_builds_and_use_version_from_master(self):
        # TODO: See a broken loop
        if "force tests" in self.labels:
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@ -8,7 +8,7 @@ from github import Github
 from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from pr_info import PRInfo
 from get_robot_token import get_best_robot_token
-from commit_status_helper import get_commit
+from commit_status_helper import get_commit, post_labels, remove_labels
 NAME = "Run Check (actions)"
@ -22,6 +22,7 @@ OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"}
 CAN_BE_TESTED_LABEL = "can be tested"
 DO_NOT_TEST_LABEL = "do not test"
 FORCE_TESTS_LABEL = "force tests"
 SUBMODULE_CHANGED_LABEL = "submodule changed"
 # Individual trusted contirbutors who are not in any trusted organization.
 # Can be changed in runtime: we will append users that we learned to be in
@ -81,6 +82,25 @@ TRUSTED_CONTRIBUTORS = {
    ]
 }
 MAP_CATEGORY_TO_LABEL = {
    "New Feature": "pr-feature",
    "Bug Fix": "pr-bugfix",
    "Bug Fix (user-visible misbehaviour in official stable or prestable release)": "pr-bugfix",
    "Improvement": "pr-improvement",
    "Performance Improvement": "pr-performance",
    "Backward Incompatible Change": "pr-backward-incompatible",
    "Build/Testing/Packaging Improvement": "pr-build",
    "Build Improvement": "pr-build",
    "Build/Testing Improvement": "pr-build",
    "Build": "pr-build",
    "Packaging Improvement": "pr-build",
    "Not for changelog (changelog entry is not required)": "pr-not-for-changelog",
    "Not for changelog": "pr-not-for-changelog",
    "Documentation (changelog entry is not required)": "pr-documentation",
    "Documentation": "pr-documentation",
    # 'Other': doesn't match anything
 }
 def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
    if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
@ -168,7 +188,7 @@ def check_pr_description(pr_info):
                    + second_category
                    + "'"
                )
-                return result_status[:140]
+                return result_status[:140], category
        elif re.match(
            r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
@ -190,30 +210,57 @@ def check_pr_description(pr_info):
            i += 1
    if not category:
-        return "Changelog category is empty"
+        return "Changelog category is empty", category
    # Filter out the PR categories that are not for changelog.
    if re.match(
        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
        category,
    ):
-        return ""
+        return "", category
    if not entry:
-        return f"Changelog entry required for category '{category}'"
+        return f"Changelog entry required for category '{category}'", category
-    return ""
+    return "", category
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
-    pr_info = PRInfo(need_orgs=True, pr_event_from_api=True)
+    pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
    can_run, description, labels_state = should_run_checks_for_pr(pr_info)
    gh = Github(get_best_robot_token())
    commit = get_commit(gh, pr_info.sha)
-    description_report = check_pr_description(pr_info)[:139]
+    description_report, category = check_pr_description(pr_info)
    pr_labels_to_add = []
    pr_labels_to_remove = []
    if (
        category in MAP_CATEGORY_TO_LABEL
        and MAP_CATEGORY_TO_LABEL[category] not in pr_info.labels
    ):
        pr_labels_to_add.append(MAP_CATEGORY_TO_LABEL[category])
    for label in pr_info.labels:
        if (
            label in MAP_CATEGORY_TO_LABEL.values()
            and category in MAP_CATEGORY_TO_LABEL
            and label != MAP_CATEGORY_TO_LABEL[category]
        ):
            pr_labels_to_remove.append(label)
    if pr_info.has_changes_in_submodules():
        pr_labels_to_add.append(SUBMODULE_CHANGED_LABEL)
    elif SUBMODULE_CHANGED_LABEL in pr_info.labels:
        pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL)
    if pr_labels_to_add:
        post_labels(gh, pr_info, pr_labels_to_add)
    if pr_labels_to_remove:
        remove_labels(gh, pr_info, pr_labels_to_remove)
    if description_report:
        print("::notice ::Cannot run, description does not match the template")
        logging.info(
@ -225,7 +272,7 @@ if __name__ == "__main__":
        )
        commit.create_status(
            context=NAME,
-            description=description_report,
+            description=description_report[:139],
            state="failure",
            target_url=url,
        )
--- a/tests/ci/split_build_smoke_check.py
+++ b/tests/ci/split_build_smoke_check.py
@ -147,4 +147,8 @@ if __name__ == "__main__":
        report_url,
        CHECK_NAME,
    )
    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    if state == "error":
        sys.exit(1)
--- a/tests/ci/stress_check.py
+++ b/tests/ci/stress_check.py
@ -177,3 +177,6 @@ if __name__ == "__main__":
        check_name,
    )
    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    if state == "error":
        sys.exit(1)
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@ -118,3 +118,6 @@ if __name__ == "__main__":
        NAME,
    )
    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    if state == "error":
        sys.exit(1)
--- a/tests/ci/unit_tests_check.py
+++ b/tests/ci/unit_tests_check.py
@ -173,4 +173,8 @@ if __name__ == "__main__":
        report_url,
        check_name,
    )
    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    if state == "error":
        sys.exit(1)
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@ -52,6 +52,7 @@ MESSAGES_TO_RETRY = [
    "DB::Exception: New table appeared in database being dropped or detached. Try again",
    "is already started to be removing by another replica right now",
    "DB::Exception: Cannot enqueue query",
    "environment: line 1: wait_for: No record of process", # Something weird from bash internals, let's just retry
    "is executing longer than distributed_ddl_task_timeout" # FIXME
 ]
@ -116,7 +117,7 @@ def clickhouse_execute_http(base_args, query, timeout=30, settings=None, default
 def clickhouse_execute(base_args, query, timeout=30, settings=None):
    return clickhouse_execute_http(base_args, query, timeout, settings).strip()
-def clickhouse_execute_json(base_args, query, timeout=30, settings=None):
+def clickhouse_execute_json(base_args, query, timeout=60, settings=None):
    data = clickhouse_execute_http(base_args, query, timeout, settings, 'JSONEachRow')
    if not data:
        return None
--- a/tests/integration/test_global_overcommit_tracker/test.py
+++ b/tests/integration/test_global_overcommit_tracker/test.py
@ -30,7 +30,7 @@ def test_overcommited_is_killed():
    responses_A = list()
    responses_B = list()
-    for _ in range(100):
+    for _ in range(500):
        responses_A.append(node.get_query_request(TEST_QUERY_A, user="A"))
        responses_B.append(node.get_query_request(TEST_QUERY_B, user="B"))
--- a/tests/integration/test_materialized_view_restart_server/init.py
+++ b/tests/integration/test_materialized_view_restart_server/init.py
--- a/tests/integration/test_materialized_view_restart_server/test.py
+++ b/tests/integration/test_materialized_view_restart_server/test.py
@ -0,0 +1,25 @@
 import pytest
 from helpers.cluster import ClickHouseCluster
 cluster = ClickHouseCluster(__file__)
 node = cluster.add_instance("node", stay_alive=True)
@pytest.fixture(scope="module")
 def start_cluster():
    try:
        cluster.start()
        yield cluster
    finally:
        cluster.shutdown()
 def test_materialized_view_with_subquery(start_cluster):
    node.query("create table test (x UInt32) engine=TinyLog()")
    node.query(
        "create materialized view mv engine = TinyLog() as with subquery as (select * from test) select * from subquery"
    )
    node.restart_clickhouse(kill=True)
    node.query("insert into test select 1")
    result = node.query("select * from mv")
    assert int(result) == 1
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@ -517,7 +517,7 @@ def test_put_get_with_globs(started_cluster):
        # ("'minio','minio123',",True), Redirect with credentials not working with nginx.
    ],
 )
-def test_multipart_put(started_cluster, maybe_auth, positive):
+def test_multipart(started_cluster, maybe_auth, positive):
    # type: (ClickHouseCluster) -> None
    bucket = (
@ -535,8 +535,9 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
    one_line_length = 6  # 3 digits, 2 commas, 1 line separator.
    total_rows = csv_size_bytes // one_line_length
    # Generate data having size more than one part
-    int_data = [[1, 2, 3] for i in range(csv_size_bytes // one_line_length)]
+    int_data = [[1, 2, 3] for i in range(total_rows)]
    csv_data = "".join(["{},{},{}\n".format(x, y, z) for x, y, z in int_data])
    assert len(csv_data) > min_part_size_bytes
@ -573,6 +574,37 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
        assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
    # select uploaded data from many threads
    select_query = (
        "select sum(column1), sum(column2), sum(column3) "
        "from s3('http://{host}:{port}/{bucket}/{filename}', {auth}'CSV', '{table_format}')".format(
            host=started_cluster.minio_redirect_host,
            port=started_cluster.minio_redirect_port,
            bucket=bucket,
            filename=filename,
            auth=maybe_auth,
            table_format=table_format,
        )
    )
    try:
        select_result = run_query(
            instance,
            select_query,
            settings={
                "max_download_threads": random.randint(4, 16),
                "max_download_buffer_size": 1024 * 1024,
            },
        )
    except helpers.client.QueryRuntimeException:
        if positive:
            raise
    else:
        assert positive
        assert (
            select_result
            == "\t".join(map(str, [total_rows, total_rows * 2, total_rows * 3])) + "\n"
        )
 def test_remote_host_filter(started_cluster):
    instance = started_cluster.instances["restricted_dummy"]
--- a/tests/performance/group_array_sorted.xml
+++ b/tests/performance/group_array_sorted.xml
@ -0,0 +1,27 @@
 <test>
    <settings>
        <max_threads>10</max_threads>
    </settings>
    <substitutions>
        <substitution>
            <name>items</name>
            <values>
                <value>1000</value>
                <value>100000</value>
                <value>10000000</value>
            </values>
        </substitution>
    </substitutions>
    <create_query>CREATE TABLE test ( `id` UInt64, `value` UInt64, `text` String ) ENGINE = Memory</create_query>
    <fill_query>INSERT INTO test SELECT number as id, rand64() as value, toString(number) as text FROM numbers({items})</fill_query>
    <query>SELECT groupArraySorted(10)(id, value) FROM test</query>
    <query>SELECT groupArraySorted(10)(text, value) FROM test</query>
    <query>SELECT groupArraySorted(10)((id, text), value) FROM test</query>
    <query>SELECT groupArraySorted(10)(text) FROM test</query>
    <query>SELECT groupArraySorted(10000)(id, value) FROM test</query>
    <query>SELECT groupArraySorted(10000)(text, value) FROM test</query>
    <query>SELECT groupArraySorted(10000)((id, text), value) FROM test</query>
    <query>SELECT groupArraySorted(10000)(text) FROM test</query>
    <drop_query>DROP TABLE IF EXISTS test</drop_query>
 </test>
--- a/tests/performance/hash_table_sizes_stats.xml
+++ b/tests/performance/hash_table_sizes_stats.xml
@ -0,0 +1,29 @@
 <test>
    <preconditions>
        <table_exists>hits_10m_single</table_exists>
        <table_exists>hits_100m_single</table_exists>
    </preconditions>
    <settings>
        <max_size_to_preallocate_for_aggregation>1000000000</max_size_to_preallocate_for_aggregation>
    </settings>
    <query>SELECT number FROM numbers(5000000) GROUP BY number FORMAT Null</query>
    <query>SELECT number FROM numbers(10000000) GROUP BY number FORMAT Null</query>
    <query short="1">SELECT number FROM numbers_mt(500000) GROUP BY number FORMAT Null</query>
    <query short="1">SELECT number FROM numbers_mt(1000000) GROUP BY number FORMAT Null</query>
    <query>SELECT number FROM numbers_mt(10000000) GROUP BY number FORMAT Null</query>
    <query>SELECT number FROM numbers_mt(50000000) GROUP BY number FORMAT Null</query>
    <query>WITH number % 524289 AS k, toUInt64(k) AS k1, k1 + 1 AS k2 SELECT k1, k2, count() FROM numbers(100000000) GROUP BY k1, k2 FORMAT Null</query>
    <query>SELECT number FROM numbers_mt(10000000) GROUP BY number FORMAT Null SETTINGS group_by_two_level_threshold = 1e12, group_by_two_level_threshold_bytes = 1e12</query>
    <query>SELECT number FROM numbers_mt(50000000) GROUP BY number FORMAT Null SETTINGS group_by_two_level_threshold = 1e12, group_by_two_level_threshold_bytes = 1e12</query>
    <query>SELECT WatchID FROM hits_10m_single GROUP BY WatchID FORMAT Null</query>
    <query>SELECT WatchID FROM hits_100m_single GROUP BY WatchID FORMAT Null</query>
    <query>SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_10m_single GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10</query>
    <query>SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_100m_single GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10</query>
    <query>SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m_single WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10</query>
    <query>SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_single WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10</query>
    <query>SELECT min(MobilePhoneModel) FROM hits_10m_single WHERE MobilePhoneModel != '' GROUP BY intHash32(UserID) % 1000000 FORMAT Null</query>
    <query>SELECT min(MobilePhoneModel) FROM hits_100m_single WHERE MobilePhoneModel != '' GROUP BY intHash32(UserID) % 1000000 FORMAT Null</query>
 </test>
--- a/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app_long.sh
+++ b/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app_long.sh
@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long
+# Tags: long, no-random-settings
 set -e
--- a/tests/queries/0_stateless/00417_kill_query.sh
+++ b/tests/queries/0_stateless/00417_kill_query.sh
@ -21,3 +21,5 @@ $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 ASYNC"
 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated"
 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 SYNC FORMAT TabSeparated"
 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 1 TEST" &>/dev/null
 clickhouse_test_wait_queries 60
--- a/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql
+++ b/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql
@ -1,3 +1,5 @@
 -- Tags: no-random-settings
 drop table if exists tab_00484;
 create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
 insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 8192;
--- a/tests/queries/0_stateless/02122_parallel_formatting.sh
+++ b/tests/queries/0_stateless/02122_parallel_formatting.sh
@ -18,8 +18,8 @@ for format in ${formats}; do
    diff $non_parallel_file $parallel_file
    echo $format-2
-    $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file
+    $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file
-    $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file
+    $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file
    diff $non_parallel_file $parallel_file
 done
@ -33,15 +33,17 @@ $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(nu
 diff $non_parallel_file $parallel_file
 echo "CustomSeparated-2"
-$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format CustomSeparated $CUSTOM_SETTINGS" --output_format_parallel_formatting=0 --extremes=1 > $non_parallel_file
+$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format CustomSeparated $CUSTOM_SETTINGS" --output_format_parallel_formatting=0 --extremes=1 > $non_parallel_file
-$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format CustomSeparated $CUSTOM_SETTINGS" --output_format_parallel_formatting=1 --extremes=1 > $parallel_file
+$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format CustomSeparated $CUSTOM_SETTINGS" --output_format_parallel_formatting=1 --extremes=1 > $parallel_file
 diff $non_parallel_file $parallel_file
-echo -ne '{prefix} \n${data}\n $$ suffix $$\n' > "$CUR_DIR"/02122_template_format_resultset.tmp
+resultset_path=$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME"_template_format_resultset.tmp"
-echo -ne 'x:${x:Quoted}, y:${y:Quoted}, s:${s:Quoted}' > "$CUR_DIR"/02122_template_format_row.tmp
+echo -ne '{prefix} \n${data}\n $$ suffix $$\n' > $resultset_path
 row_path=$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME"_template_format_row.tmp"
 echo -ne 'x:${x:Quoted}, y:${y:Quoted}, s:${s:Quoted}' > $row_path
-TEMPLATE_SETTINGS="SETTINGS format_template_resultset = '$CUR_DIR/02122_template_format_resultset.tmp', format_template_row = '$CUR_DIR/02122_template_format_row.tmp', format_template_rows_between_delimiter = ';\n'"
+TEMPLATE_SETTINGS="SETTINGS format_template_resultset = '$resultset_path', format_template_row = '$row_path', format_template_rows_between_delimiter = ';\n'"
 echo "Template-1"
 $CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=0 > $non_parallel_file
@ -49,14 +51,14 @@ $CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', t
 diff $non_parallel_file $parallel_file
-echo -ne '{prefix} \n${data}\n $$ suffix $$\n${totals}\n${min}\n${max}\n${rows:Quoted}\n${rows_before_limit:Quoted}\n${rows_read:Quoted}\n${bytes_read:Quoted}\n' > "$CUR_DIR"/02122_template_format_resultset.tmp
+echo -ne '{prefix} \n${data}\n $$ suffix $$\n${totals}\n${min}\n${max}\n${rows:Quoted}\n${rows_before_limit:Quoted}\n${rows_read:Quoted}\n${bytes_read:Quoted}\n' > $resultset_path
 echo "Template-2"
-$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) group by number with totals limit 190000 format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=0 --extremes=1 > $non_parallel_file
+$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) group by number with totals order by number limit 190000 format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=0 --extremes=1 > $non_parallel_file
-$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) group by number with totals limit 190000 format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=1 --extremes=1 > $parallel_file
+$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) group by number with totals order by number limit 190000 format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=1 --extremes=1 > $parallel_file
 diff $non_parallel_file $parallel_file
 rm $non_parallel_file $parallel_file
-rm "$CUR_DIR"/02122_template_format_resultset.tmp "$CUR_DIR"/02122_template_format_row.tmp
+rm $resultset_path $row_path
--- a/tests/queries/0_stateless/02151_hash_table_sizes_stats.reference
+++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats.reference
@ -0,0 +1,21 @@
 1
 --
 1
 --
 1
 --
 1
 --
 1
 1
 --
 1
 --
 1
 1
 --
 1
 --
 1
 1
 --
--- a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh
+++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh
@ -0,0 +1,90 @@
 #!/usr/bin/env bash
 # Tags: long
 # shellcheck disable=SC2154
 unset CLICKHOUSE_LOG_COMMENT
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 # tests rely on that all the rows are unique and max_threads divides table_size
 table_size=10000
 max_threads=5
 prepare_table() {
  table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM"
  $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;"
  if [ -z "$1" ]; then
    $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple();"
  else
    $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1;"
  fi
  $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;"
  for ((i = 1; i <= max_threads; i++)); do
    cnt=$((table_size / max_threads))
    from=$(((i - 1) * cnt))
    $CLICKHOUSE_CLIENT -q "INSERT INTO $table_name SELECT * FROM numbers($from, $cnt);"
  done
 }
 prepare_table_with_sorting_key() {
  prepare_table "$1"
 }
 run_query() {
  query_id="${CLICKHOUSE_DATABASE}_hash_table_sizes_stats_$RANDOM$RANDOM"
  $CLICKHOUSE_CLIENT --query_id="$query_id" --multiquery -q "
    SET max_block_size = $((table_size / 10));
    SET merge_tree_min_rows_for_concurrent_read = 1;
    SET max_untracked_memory = 0;
    SET max_size_to_preallocate_for_aggregation = 1e12;
    $query"
 }
 check_preallocated_elements() {
  $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
  # rows may be distributed in any way including "everything goes to the one particular thread"
  min=$1
  if [ -z "$2" ]; then
    max=$1
  else
    max=$2
  fi
  $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
    SELECT COUNT(*)
      FROM system.query_log
     WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase()
           AND ProfileEvents['AggregationPreallocatedElementsInHashTables'] BETWEEN $min AND $max"
 }
 check_convertion_to_two_level() {
  $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
  # rows may be distributed in any way including "everything goes to the one particular thread"
  $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
    SELECT SUM(ProfileEvents['AggregationHashTablesInitializedAsTwoLevel']) BETWEEN 1 AND $max_threads
      FROM system.query_log
     WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase()"
 }
 print_border() {
  echo "--"
 }
 # shellcheck source=./02151_hash_table_sizes_stats.testcases
 source "$CURDIR"/02151_hash_table_sizes_stats.testcases
 test_one_thread_simple_group_by
 test_one_thread_simple_group_by_with_limit
 test_one_thread_simple_group_by_with_join_and_subquery
 test_several_threads_simple_group_by_with_limit_single_level_ht
 test_several_threads_simple_group_by_with_limit_two_level_ht
 test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht
 test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht
 test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht
 test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht
--- a/tests/queries/0_stateless/02151_hash_table_sizes_stats.testcases
+++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats.testcases
@ -0,0 +1,195 @@
 test_one_thread_simple_group_by() {
  expected_size_hint=$table_size
  prepare_table
  query="
  -- size_hint = $expected_size_hint --
    SELECT number
      FROM $table_name
  GROUP BY number
  SETTINGS max_threads = 1
    FORMAT Null;"
  run_query
  run_query
  check_preallocated_elements $expected_size_hint
  print_border
 }
 test_one_thread_simple_group_by_with_limit() {
  expected_size_hint=$table_size
  prepare_table
  query="
  -- size_hint = $expected_size_hint despite the presence of limit --
    SELECT number
      FROM $table_name
  GROUP BY number
     LIMIT 5
  SETTINGS max_threads = 1
    FORMAT Null;"
  run_query
  run_query
  check_preallocated_elements $expected_size_hint
  print_border
 }
 test_one_thread_simple_group_by_with_join_and_subquery() {
  expected_size_hint=$((table_size + table_size / 2))
  prepare_table
  query="
  -- expected two size_hints for different keys: for the inner ($table_size) and the outer aggregation ($((table_size / 2)))
    SELECT number
      FROM $table_name AS t1
      JOIN
      (
        SELECT number
          FROM $table_name AS t2
      GROUP BY number
         LIMIT $((table_size / 2))
      ) AS t3 USING(number)
  GROUP BY number
  SETTINGS max_threads = 1,
           distributed_product_mode = 'local'
    FORMAT Null;"
  run_query
  run_query
  check_preallocated_elements $expected_size_hint
  print_border
 }
 test_several_threads_simple_group_by_with_limit_single_level_ht() {
  expected_size_hint=$table_size
  prepare_table
  query="
  -- size_hint = $expected_size_hint despite the presence of limit --
    SELECT number
      FROM $table_name
  GROUP BY number
     LIMIT 5
  SETTINGS max_threads = $max_threads,
           group_by_two_level_threshold = $((expected_size_hint + 1)),
           group_by_two_level_threshold_bytes = $((table_size * 1000))
    FORMAT Null;"
  run_query
  run_query
  check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
  print_border
 }
 test_several_threads_simple_group_by_with_limit_two_level_ht() {
  expected_size_hint=$table_size
  prepare_table
  query="
  -- size_hint = $expected_size_hint despite the presence of limit --
    SELECT number
      FROM $table_name
  GROUP BY number
     LIMIT 5
  SETTINGS max_threads = $max_threads,
           group_by_two_level_threshold = $expected_size_hint,
           group_by_two_level_threshold_bytes = $((table_size * 1000))
    FORMAT Null;"
  run_query
  run_query
  check_convertion_to_two_level
  check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
  print_border
 }
 test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht() {
  expected_size_hint=$table_size
  prepare_table
  query="
  -- size_hint = $expected_size_hint despite the presence of limit --
       SELECT number
         FROM $table_name
     GROUP BY number
  WITH ROLLUP
        LIMIT 5
     SETTINGS max_threads = $max_threads,
              group_by_two_level_threshold = $((expected_size_hint + 1)),
              group_by_two_level_threshold_bytes = $((table_size * 1000))
       FORMAT Null;"
  run_query
  run_query
  check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
  print_border
 }
 test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht() {
  expected_size_hint=$table_size
  prepare_table
  query="
  -- size_hint = $expected_size_hint despite the presence of limit --
       SELECT number
         FROM $table_name
     GROUP BY number
  WITH ROLLUP
        LIMIT 5
     SETTINGS max_threads = $max_threads,
              group_by_two_level_threshold = $expected_size_hint,
              group_by_two_level_threshold_bytes = $((table_size * 1000))
       FORMAT Null;"
  run_query
  run_query
  check_convertion_to_two_level
  check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
  print_border
 }
 test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht() {
  expected_size_hint=$table_size
  prepare_table
  query="
  -- size_hint = $expected_size_hint despite the presence of limit --
     SELECT number
       FROM $table_name
   GROUP BY number
  WITH CUBE
      LIMIT 5
   SETTINGS max_threads = $max_threads,
            group_by_two_level_threshold = $((expected_size_hint + 1)),
            group_by_two_level_threshold_bytes = $((table_size * 1000))
     FORMAT Null;"
  run_query
  run_query
  check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
  print_border
 }
 test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht() {
  expected_size_hint=$table_size
  prepare_table
  query="
  -- size_hint = $expected_size_hint despite the presence of limit --
     SELECT number
       FROM $table_name
   GROUP BY number
  WITH CUBE
      LIMIT 5
   SETTINGS max_threads = $max_threads,
            group_by_two_level_threshold = $expected_size_hint,
            group_by_two_level_threshold_bytes = $((table_size * 1000))
     FORMAT Null;"
  run_query
  run_query
  check_convertion_to_two_level
  check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
  print_border
 }
--- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.reference
+++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.reference
@ -0,0 +1,33 @@
 1
 1
 --
 1
 1
 --
 1
 1
 --
 1
 1
 --
 1
 1
 1
 1
 --
 1
 1
 --
 1
 1
 1
 1
 --
 1
 1
 --
 1
 1
 1
 1
 --
--- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh
+++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh
@ -0,0 +1,97 @@
 #!/usr/bin/env bash
 # Tags: long, distributed
 # These tests don't use `current_database = currentDatabase()` condition, because database name isn't propagated during remote queries.
 # shellcheck disable=SC2154
 unset CLICKHOUSE_LOG_COMMENT
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 # tests rely on that all the rows are unique and max_threads divides table_size
 table_size=10000
 max_threads=5
 prepare_table() {
  table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM"
  $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;"
  if [ -z "$1" ]; then
    $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple();"
  else
    $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1;"
  fi
  $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;"
  for ((i = 1; i <= max_threads; i++)); do
    cnt=$((table_size / max_threads))
    from=$(((i - 1) * cnt))
    $CLICKHOUSE_CLIENT -q "INSERT INTO $table_name SELECT * FROM numbers($from, $cnt);"
  done
  $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${table_name}_d;"
  $CLICKHOUSE_CLIENT -q "CREATE TABLE ${table_name}_d AS $table_name ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), $table_name);"
  table_name="${table_name}_d"
 }
 prepare_table_with_sorting_key() {
  prepare_table "$1"
 }
 run_query() {
  query_id="${CLICKHOUSE_DATABASE}_hash_table_sizes_stats_$RANDOM$RANDOM"
  $CLICKHOUSE_CLIENT --query_id="$query_id" --multiquery -q "
    SET max_block_size = $((table_size / 10));
    SET merge_tree_min_rows_for_concurrent_read = 1;
    SET max_untracked_memory = 0;
    SET prefer_localhost_replica = 1;
    $query"
 }
 check_preallocated_elements() {
  $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
  # rows may be distributed in any way including "everything goes to the one particular thread"
  min=$1
  if [ -z "$2" ]; then
    max=$1
  else
    max=$2
  fi
  $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
    SELECT COUNT(*)
      FROM system.query_log
     WHERE event_date >= yesterday() AND (query_id = {query_id:String} OR initial_query_id = {query_id:String})
           AND ProfileEvents['AggregationPreallocatedElementsInHashTables'] BETWEEN $min AND $max
  GROUP BY query_id"
 }
 check_convertion_to_two_level() {
  $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
  # rows may be distributed in any way including "everything goes to the one particular thread"
  $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
    SELECT SUM(ProfileEvents['AggregationHashTablesInitializedAsTwoLevel']) BETWEEN 1 AND $max_threads
      FROM system.query_log
     WHERE event_date >= yesterday() AND (query_id = {query_id:String} OR initial_query_id = {query_id:String})
  GROUP BY query_id"
 }
 print_border() {
  echo "--"
 }
 # shellcheck source=./02151_hash_table_sizes_stats.testcases
 source "$CURDIR"/02151_hash_table_sizes_stats.testcases
 test_one_thread_simple_group_by
 test_one_thread_simple_group_by_with_limit
 test_one_thread_simple_group_by_with_join_and_subquery
 test_several_threads_simple_group_by_with_limit_single_level_ht
 test_several_threads_simple_group_by_with_limit_two_level_ht
 test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht
 test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht
 test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht
 test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht
--- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh
+++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh
@ -23,7 +23,7 @@ function run_and_check()
    echo "Checking $*"
    # Run query with external table (implicit StorageMemory user)
-    $CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&query_id=$query_id&$*" -o /dev/null
+    $CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&max_untracked_memory=0&query_id=$query_id&$*" -o /dev/null
    ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS'
--- a/tests/queries/0_stateless/02158_grouparraysorted.reference
+++ b/tests/queries/0_stateless/02158_grouparraysorted.reference
@ -0,0 +1,18 @@
 [0,1,2,3,4]
 [0,1,2,3,4,5,6,7,8,9]
 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99]
 [999,998,997,996,995,994,993,992,991,990,989,988,987,986,985,984,983,982,981,980,979,978,977,976,975,974,973,972,971,970,969,968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937,936,935,934,933,932,931,930,929,928,927,926,925,924,923,922,921,920,919,918,917,916,915,914,913,912,911,910,909,908,907,906,905,904,903,902,901,900]
 ['0','1','2','3','4']
 ['0','1','2','3','4']
 ['9','8','7','6','5']
 [(0,'0'),(1,'1'),(2,'2'),(3,'3'),(4,'4')]
 ['0','1','10','11','12']
 ['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49']
 [0,0,1,1,2,2,3,3,4,4]
 pablo	[1,2]
 luis	[1,3]
 pablo	[1,2]
 luis	[1,3]
 [4,5,6,7,8]
 [10,11,12,13,14]
 ['10','11','12','13','14']
--- a/tests/queries/0_stateless/02158_grouparraysorted.sql
+++ b/tests/queries/0_stateless/02158_grouparraysorted.sql
@ -0,0 +1,43 @@
 SELECT groupArraySorted(5)(number) from numbers(100);
 SELECT groupArraySorted(number, number) from numbers(100);
 SELECT groupArraySorted(100)(number, number) from numbers(1000);
 SELECT groupArraySorted(100)(number, -number) from numbers(1000);
 SELECT groupArraySorted(5)(str, number) FROM (SELECT toString(number) as str, number FROM numbers(10));
 SELECT groupArraySorted(5)(text) FROM (select toString(number) as text from numbers(10));
 SELECT groupArraySorted(5)(text, -number) FROM (select toString(number) as text, number from numbers(10));
 SELECT groupArraySorted(5)((number,text)) from (SELECT toString(number) as text, number FROM numbers(100));
 SELECT groupArraySorted(5)(text,text) from (SELECT toString(number) as text FROM numbers(100));
 SELECT groupArraySorted(50)(text,(number,text)) from (SELECT toString(number) as text, number FROM numbers(100));
 SELECT groupArraySorted(10)(toInt64(number/2)) FROM numbers(100);
 DROP TABLE IF EXISTS test;
 DROP VIEW IF EXISTS mv_test;
 CREATE TABLE test (`n` String, `h` Int64) ENGINE = MergeTree ORDER BY n;
 CREATE MATERIALIZED VIEW mv_test (`n` String, `h` AggregateFunction(groupArraySorted(2), Int64, Int64)) ENGINE = AggregatingMergeTree ORDER BY n AS SELECT n, groupArraySortedState(2)(h, h) as h FROM test GROUP BY n;
 INSERT INTO test VALUES ('pablo',1)('pablo', 2)('luis', 1)('luis', 3)('pablo', 5)('pablo',4)('pablo', 5)('luis', 6)('luis', 7)('pablo', 8)('pablo',9)('pablo',10)('luis',11)('luis',12)('pablo',13);
 SELECT n, groupArraySortedMerge(2)(h) from mv_test GROUP BY n;
 DROP TABLE IF EXISTS test;
 DROP VIEW IF EXISTS mv_test;
 CREATE TABLE test (`n` String, `h` Int64) ENGINE = MergeTree ORDER BY n;
 CREATE MATERIALIZED VIEW mv_test (`n` String, `h` AggregateFunction(groupArraySorted(2), Int64)) ENGINE = AggregatingMergeTree ORDER BY n AS SELECT n, groupArraySortedState(2)(h) as h FROM test GROUP BY n;
 INSERT INTO test VALUES ('pablo',1)('pablo', 2)('luis', 1)('luis', 3)('pablo', 5)('pablo',4)('pablo', 5)('luis', 6)('luis', 7)('pablo', 8)('pablo',9)('pablo',10)('luis',11)('luis',12)('pablo',13);
 SELECT n, groupArraySortedMerge(2)(h) from mv_test GROUP BY n;
 DROP TABLE test;
 DROP VIEW mv_test;
 SELECT groupArraySortedIf(5)(number, number, number>3) from numbers(100);
 SELECT groupArraySortedIf(5)(number, toString(number), number>3) from numbers(100);
 SELECT groupArraySortedIf(5)(toString(number), number>3) from numbers(100);
--- a/tests/queries/0_stateless/02167_format_from_file_extension.sh
+++ b/tests/queries/0_stateless/02167_format_from_file_extension.sh
@ -5,6 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 function cleanup()
 {
    # this command expects an error message like 'Code: 107. DB::Exception: Received <...> nonexist.txt doesn't exist. (FILE_DOESNT_EXIST)'
    user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
    rm $user_files_path/test_02167.*
 }
 trap cleanup EXIT
 for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes
 do
--- a/tests/queries/0_stateless/02245_format_string_stack_overflow.reference
+++ b/tests/queries/0_stateless/02245_format_string_stack_overflow.reference
@ -0,0 +1 @@
 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
--- a/tests/queries/0_stateless/02245_format_string_stack_overflow.sql
+++ b/tests/queries/0_stateless/02245_format_string_stack_overflow.sql
--- a/tests/queries/0_stateless/02248_nullable_custom_types_to_string.reference
+++ b/tests/queries/0_stateless/02248_nullable_custom_types_to_string.reference
@ -0,0 +1,6 @@
 true
 \N
 0.0.0.0
 \N
 ::ffff:127.0.0.1
 \N
--- a/tests/queries/0_stateless/02248_nullable_custom_types_to_string.sql
+++ b/tests/queries/0_stateless/02248_nullable_custom_types_to_string.sql
@ -0,0 +1,6 @@
 select toString(toNullable(true));
 select toString(CAST(NULL, 'Nullable(Bool)'));
 select toString(toNullable(toIPv4('0.0.0.0')));
 select toString(CAST(NULL, 'Nullable(IPv4)'));
 select toString(toNullable(toIPv6('::ffff:127.0.0.1')));
 select toString(CAST(NULL, 'Nullable(IPv6)'));
--- a/tests/queries/1_stateful/00140_rename.sql
+++ b/tests/queries/1_stateful/00140_rename.sql
@ -1,4 +1,4 @@
-- Tags: no-replicated-database
+-- Tags: no-replicated-database, no-parallel
 -- Tag no-replicated-database: Does not support renaming of multiple tables in single query
 RENAME TABLE test.hits TO test.visits_tmp, test.visits TO test.hits, test.visits_tmp TO test.visits;
--- a/tests/queries/1_stateful/00152_insert_different_granularity.sql
+++ b/tests/queries/1_stateful/00152_insert_different_granularity.sql
@ -1,4 +1,4 @@
-- Tags: no-tsan, no-replicated-database
+-- Tags: no-tsan, no-replicated-database, no-parallel
 -- Tag no-replicated-database: Fails due to additional replicas or shards
 DROP TABLE IF EXISTS fixed_granularity_table;
--- a/tests/queries/1_stateful/00170_s3_cache.reference
+++ b/tests/queries/1_stateful/00170_s3_cache.reference
@ -1,4 +1,5 @@
 -- { echo }
 SET max_memory_usage='20G';
 SELECT count() FROM test.hits_s3;
 8873898
--- a/tests/queries/1_stateful/00170_s3_cache.sql
+++ b/tests/queries/1_stateful/00170_s3_cache.sql
@ -1,4 +1,7 @@
 -- Tags: no-parallel
 -- { echo }
 SET max_memory_usage='20G';
 SELECT count() FROM test.hits_s3;
 SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0;
--- a/utils/github/init.py
+++ b/utils/github/init.py
@ -1 +0,0 @@
 # -*- coding: utf-8 -*-
--- a/utils/github/backport.py
+++ b/utils/github/backport.py
@ -1,185 +0,0 @@
 # -*- coding: utf-8 -*-
 try:
    from clickhouse.utils.github.cherrypick import CherryPick
    from clickhouse.utils.github.query import Query as RemoteRepo
    from clickhouse.utils.github.local import Repository as LocalRepo
 except:
    from .cherrypick import CherryPick
    from .query import Query as RemoteRepo
    from .local import Repository as LocalRepo
 import argparse
 import logging
 import re
 import sys
 class Backport:
    def __init__(self, token, owner, name, team):
        self._gh = RemoteRepo(
            token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
        )
        self._token = token
        self.default_branch_name = self._gh.default_branch
        self.ssh_url = self._gh.ssh_url
    def getPullRequests(self, from_commit):
        return self._gh.get_pull_requests(from_commit)
    def getBranchesWithRelease(self):
        branches = set()
        for pull_request in self._gh.find_pull_requests("release"):
            branches.add(pull_request["headRefName"])
        return branches
    def execute(self, repo, upstream, until_commit, run_cherrypick):
        repo = LocalRepo(repo, upstream, self.default_branch_name)
        all_branches = repo.get_release_branches()  # [(branch_name, base_commit)]
        release_branches = self.getBranchesWithRelease()
        branches = []
        # iterate over all branches to preserve their precedence.
        for branch in all_branches:
            if branch[0] in release_branches:
                branches.append(branch)
        if not branches:
            logging.info("No release branches found!")
            return
        for branch in branches:
            logging.info("Found release branch: %s", branch[0])
        if not until_commit:
            until_commit = branches[0][1]
        pull_requests = self.getPullRequests(until_commit)
        backport_map = {}
        RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
        RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
        RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
        # pull-requests are sorted by ancestry from the most recent.
        for pr in pull_requests:
            while repo.comparator(branches[-1][1]) >= repo.comparator(
                pr["mergeCommit"]["oid"]
            ):
                logging.info(
                    "PR #{} is already inside {}. Dropping this branch for further PRs".format(
                        pr["number"], branches[-1][0]
                    )
                )
                branches.pop()
            logging.info("Processing PR #{}".format(pr["number"]))
            assert len(branches)
            branch_set = set([branch[0] for branch in branches])
            # First pass. Find all must-backports
            for label in pr["labels"]["nodes"]:
                if label["name"] == "pr-must-backport":
                    backport_map[pr["number"]] = branch_set.copy()
                    continue
                matched = RE_MUST_BACKPORT.match(label["name"])
                if matched:
                    if pr["number"] not in backport_map:
                        backport_map[pr["number"]] = set()
                    backport_map[pr["number"]].add(matched.group(1))
            # Second pass. Find all no-backports
            for label in pr["labels"]["nodes"]:
                if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
                    del backport_map[pr["number"]]
                    break
                matched_no_backport = RE_NO_BACKPORT.match(label["name"])
                matched_backported = RE_BACKPORTED.match(label["name"])
                if (
                    matched_no_backport
                    and pr["number"] in backport_map
                    and matched_no_backport.group(1) in backport_map[pr["number"]]
                ):
                    backport_map[pr["number"]].remove(matched_no_backport.group(1))
                    logging.info(
                        "\tskipping %s because of forced no-backport",
                        matched_no_backport.group(1),
                    )
                elif (
                    matched_backported
                    and pr["number"] in backport_map
                    and matched_backported.group(1) in backport_map[pr["number"]]
                ):
                    backport_map[pr["number"]].remove(matched_backported.group(1))
                    logging.info(
                        "\tskipping %s because it's already backported manually",
                        matched_backported.group(1),
                    )
        for pr, branches in list(backport_map.items()):
            logging.info("PR #%s needs to be backported to:", pr)
            for branch in branches:
                logging.info(
                    "\t%s, and the status is: %s",
                    branch,
                    run_cherrypick(self._token, pr, branch),
                )
        # print API costs
        logging.info("\nGitHub API total costs per query:")
        for name, value in list(self._gh.api_costs.items()):
            logging.info("%s : %s", name, value)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--token", type=str, required=True, help="token for Github access"
    )
    parser.add_argument(
        "--repo",
        type=str,
        required=True,
        help="path to full repository",
        metavar="PATH",
    )
    parser.add_argument(
        "--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="do not create or merge any PRs",
        default=False,
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        help="more verbose output",
        default=False,
    )
    parser.add_argument(
        "--upstream",
        "-u",
        type=str,
        help="remote name of upstream in repository",
        default="origin",
    )
    args = parser.parse_args()
    if args.verbose:
        logging.basicConfig(
            format="%(message)s", stream=sys.stdout, level=logging.DEBUG
        )
    else:
        logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
    cherrypick_run = lambda token, pr, branch: CherryPick(
        token, "ClickHouse", "ClickHouse", "core", pr, branch
    ).execute(args.repo, args.dry_run)
    bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
    bp.execute(args.repo, args.upstream, args.til, cherrypick_run)
--- a/utils/github/cherrypick.py
+++ b/utils/github/cherrypick.py
@ -1,323 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Backports changes from PR to release branch.
 Requires multiple separate runs as part of the implementation.
 First run should do the following:
 1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr})
 2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit})
 3. Create PR from temporary branch to backport branch (emulating cherry-pick).
 Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
 Third run creates PR from backport branch (with merged previous PR) to release branch.
 """
 try:
    from clickhouse.utils.github.query import Query as RemoteRepo
 except:
    from .query import Query as RemoteRepo
 import argparse
 from enum import Enum
 import logging
 import os
 import subprocess
 import sys
 class CherryPick:
    class Status(Enum):
        DISCARDED = "discarded"
        NOT_INITIATED = "not started"
        FIRST_MERGEABLE = "waiting for 1st stage"
        FIRST_CONFLICTS = "conflicts on 1st stage"
        SECOND_MERGEABLE = "waiting for 2nd stage"
        SECOND_CONFLICTS = "conflicts on 2nd stage"
        MERGED = "backported"
    def _run(self, args):
        out = subprocess.check_output(args).rstrip()
        logging.debug(out)
        return out
    def __init__(self, token, owner, name, team, pr_number, target_branch):
        self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
        self._pr = self._gh.get_pull_request(pr_number)
        self.ssh_url = self._gh.ssh_url
        # TODO: check if pull-request is merged.
        self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
        self.target_branch = target_branch
        self.backport_branch = "backport/{branch}/{pr}".format(
            branch=target_branch, pr=pr_number
        )
        self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
            branch=target_branch, oid=self.merge_commit_oid
        )
    def getCherryPickPullRequest(self):
        return self._gh.find_pull_request(
            base=self.backport_branch, head=self.cherrypick_branch
        )
    def createCherryPickPullRequest(self, repo_path):
        DESCRIPTION = (
            "This pull-request is a first step of an automated backporting.\n"
            "It contains changes like after calling a local command `git cherry-pick`.\n"
            "If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
            "Otherwise, if you do not want to backport them, then just close this pull-request.\n"
            "\n"
            "The check results does not matter at this step - you can safely ignore them.\n"
            "Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
        )
        # FIXME: replace with something better than os.system()
        git_prefix = [
            "git",
            "-C",
            repo_path,
            "-c",
            "user.email=robot-clickhouse@yandex-team.ru",
            "-c",
            "user.name=robot-clickhouse",
        ]
        base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
        # Create separate branch for backporting, and make it look like real cherry-pick.
        self._run(git_prefix + ["checkout", "-f", self.target_branch])
        self._run(git_prefix + ["checkout", "-B", self.backport_branch])
        self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
        # Create secondary branch to allow pull request with cherry-picked commit.
        self._run(
            git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
        )
        self._run(
            git_prefix
            + [
                "push",
                "-f",
                "origin",
                "{branch}:{branch}".format(branch=self.backport_branch),
            ]
        )
        self._run(
            git_prefix
            + [
                "push",
                "-f",
                "origin",
                "{branch}:{branch}".format(branch=self.cherrypick_branch),
            ]
        )
        # Create pull-request like a local cherry-pick
        pr = self._gh.create_pull_request(
            source=self.cherrypick_branch,
            target=self.backport_branch,
            title="Cherry pick #{number} to {target}: {title}".format(
                number=self._pr["number"],
                target=self.target_branch,
                title=self._pr["title"].replace('"', '\\"'),
            ),
            description="Original pull-request #{}\n\n{}".format(
                self._pr["number"], DESCRIPTION
            ),
        )
        # FIXME: use `team` to leave a single eligible assignee.
        self._gh.add_assignee(pr, self._pr["author"])
        self._gh.add_assignee(pr, self._pr["mergedBy"])
        self._gh.set_label(pr, "do not test")
        self._gh.set_label(pr, "pr-cherrypick")
        return pr
    def mergeCherryPickPullRequest(self, cherrypick_pr):
        return self._gh.merge_pull_request(cherrypick_pr["id"])
    def getBackportPullRequest(self):
        return self._gh.find_pull_request(
            base=self.target_branch, head=self.backport_branch
        )
    def createBackportPullRequest(self, cherrypick_pr, repo_path):
        DESCRIPTION = (
            "This pull-request is a last step of an automated backporting.\n"
            "Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
            "Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
        )
        git_prefix = [
            "git",
            "-C",
            repo_path,
            "-c",
            "user.email=robot-clickhouse@clickhouse.com",
            "-c",
            "user.name=robot-clickhouse",
        ]
        pr_title = "Backport #{number} to {target}: {title}".format(
            number=self._pr["number"],
            target=self.target_branch,
            title=self._pr["title"].replace('"', '\\"'),
        )
        self._run(git_prefix + ["checkout", "-f", self.backport_branch])
        self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
        self._run(
            git_prefix
            + [
                "reset",
                "--soft",
                self._run(
                    git_prefix
                    + [
                        "merge-base",
                        "origin/" + self.target_branch,
                        self.backport_branch,
                    ]
                ),
            ]
        )
        self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
        self._run(
            git_prefix
            + [
                "push",
                "-f",
                "origin",
                "{branch}:{branch}".format(branch=self.backport_branch),
            ]
        )
        pr = self._gh.create_pull_request(
            source=self.backport_branch,
            target=self.target_branch,
            title=pr_title,
            description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
                self._pr["number"], cherrypick_pr["number"], DESCRIPTION
            ),
        )
        # FIXME: use `team` to leave a single eligible assignee.
        self._gh.add_assignee(pr, self._pr["author"])
        self._gh.add_assignee(pr, self._pr["mergedBy"])
        self._gh.set_label(pr, "pr-backport")
        return pr
    def execute(self, repo_path, dry_run=False):
        pr1 = self.getCherryPickPullRequest()
        if not pr1:
            if not dry_run:
                pr1 = self.createCherryPickPullRequest(repo_path)
                logging.debug(
                    "Created PR with cherry-pick of %s to %s: %s",
                    self._pr["number"],
                    self.target_branch,
                    pr1["url"],
                )
            else:
                return CherryPick.Status.NOT_INITIATED
        else:
            logging.debug(
                "Found PR with cherry-pick of %s to %s: %s",
                self._pr["number"],
                self.target_branch,
                pr1["url"],
            )
        if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
            if not dry_run:
                pr1 = self.mergeCherryPickPullRequest(pr1)
                logging.debug(
                    "Merged PR with cherry-pick of %s to %s: %s",
                    self._pr["number"],
                    self.target_branch,
                    pr1["url"],
                )
        if not pr1["merged"]:
            logging.debug(
                "Waiting for PR with cherry-pick of %s to %s: %s",
                self._pr["number"],
                self.target_branch,
                pr1["url"],
            )
            if pr1["closed"]:
                return CherryPick.Status.DISCARDED
            elif pr1["mergeable"] == "CONFLICTING":
                return CherryPick.Status.FIRST_CONFLICTS
            else:
                return CherryPick.Status.FIRST_MERGEABLE
        pr2 = self.getBackportPullRequest()
        if not pr2:
            if not dry_run:
                pr2 = self.createBackportPullRequest(pr1, repo_path)
                logging.debug(
                    "Created PR with backport of %s to %s: %s",
                    self._pr["number"],
                    self.target_branch,
                    pr2["url"],
                )
            else:
                return CherryPick.Status.FIRST_MERGEABLE
        else:
            logging.debug(
                "Found PR with backport of %s to %s: %s",
                self._pr["number"],
                self.target_branch,
                pr2["url"],
            )
        if pr2["merged"]:
            return CherryPick.Status.MERGED
        elif pr2["closed"]:
            return CherryPick.Status.DISCARDED
        elif pr2["mergeable"] == "CONFLICTING":
            return CherryPick.Status.SECOND_CONFLICTS
        else:
            return CherryPick.Status.SECOND_MERGEABLE
 if __name__ == "__main__":
    logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--token", "-t", type=str, required=True, help="token for Github access"
    )
    parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
    parser.add_argument(
        "--branch",
        "-b",
        type=str,
        required=True,
        help="target branch name for cherry-pick",
    )
    parser.add_argument(
        "--repo",
        "-r",
        type=str,
        required=True,
        help="path to full repository",
        metavar="PATH",
    )
    args = parser.parse_args()
    cp = CherryPick(
        args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
    )
    cp.execute(args.repo)
--- a/utils/github/local.py
+++ b/utils/github/local.py
@ -1,108 +0,0 @@
 # -*- coding: utf-8 -*-
 import functools
 import logging
 import os
 import re
 class RepositoryBase:
    def __init__(self, repo_path):
        import git
        self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
        # comparator of commits
        def cmp(x, y):
            if str(x) == str(y):
                return 0
            if self._repo.is_ancestor(x, y):
                return -1
            else:
                return 1
        self.comparator = functools.cmp_to_key(cmp)
    def get_head_commit(self):
        return self._repo.commit(self._default)
    def iterate(self, begin, end):
        rev_range = "{}...{}".format(begin, end)
        for commit in self._repo.iter_commits(rev_range, first_parent=True):
            yield commit
 class Repository(RepositoryBase):
    def __init__(self, repo_path, remote_name, default_branch_name):
        super(Repository, self).__init__(repo_path)
        self._remote = self._repo.remotes[remote_name]
        self._remote.fetch()
        self._default = self._remote.refs[default_branch_name]
    def get_release_branches(self):
        """
        Returns sorted list of tuples:
         * remote branch (git.refs.remote.RemoteReference),
         * base commit (git.Commit),
         * head (git.Commit)).
        List is sorted by commits in ascending order.
        """
        release_branches = []
        RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
        for branch in [
            r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
        ]:
            base = self._repo.merge_base(self._default, self._repo.commit(branch))
            if not base:
                logging.info(
                    "Branch %s is not based on branch %s. Ignoring.",
                    branch.path,
                    self._default,
                )
            elif len(base) > 1:
                logging.info(
                    "Branch %s has more than one base commit. Ignoring.", branch.path
                )
            else:
                release_branches.append((os.path.basename(branch.name), base[0]))
        return sorted(release_branches, key=lambda x: self.comparator(x[1]))
 class BareRepository(RepositoryBase):
    def __init__(self, repo_path, default_branch_name):
        super(BareRepository, self).__init__(repo_path)
        self._default = self._repo.branches[default_branch_name]
    def get_release_branches(self):
        """
        Returns sorted list of tuples:
         * branch (git.refs.head?),
         * base commit (git.Commit),
         * head (git.Commit)).
        List is sorted by commits in ascending order.
        """
        release_branches = []
        RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
        for branch in [
            r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
        ]:
            base = self._repo.merge_base(self._default, self._repo.commit(branch))
            if not base:
                logging.info(
                    "Branch %s is not based on branch %s. Ignoring.",
                    branch.path,
                    self._default,
                )
            elif len(base) > 1:
                logging.info(
                    "Branch %s has more than one base commit. Ignoring.", branch.path
                )
            else:
                release_branches.append((os.path.basename(branch.name), base[0]))
        return sorted(release_branches, key=lambda x: self.comparator(x[1]))
--- a/utils/github/parser.py
+++ b/utils/github/parser.py
@ -1,64 +0,0 @@
 # -*- coding: utf-8 -*-
 class Description:
    """Parsed description representation"""
    MAP_CATEGORY_TO_LABEL = {
        "New Feature": "pr-feature",
        "Bug Fix": "pr-bugfix",
        "Improvement": "pr-improvement",
        "Performance Improvement": "pr-performance",
        # 'Backward Incompatible Change': doesn't match anything
        "Build/Testing/Packaging Improvement": "pr-build",
        "Non-significant (changelog entry is not needed)": "pr-non-significant",
        "Non-significant (changelog entry is not required)": "pr-non-significant",
        "Non-significant": "pr-non-significant",
        "Documentation (changelog entry is not required)": "pr-documentation",
        # 'Other': doesn't match anything
    }
    def __init__(self, pull_request):
        self.label_name = str()
        self.legal = False
        self._parse(pull_request["bodyText"])
    def _parse(self, text):
        lines = text.splitlines()
        next_category = False
        category = str()
        for line in lines:
            stripped = line.strip()
            if not stripped:
                continue
            if next_category:
                category = stripped
                next_category = False
            if (
                stripped
                == "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
            ):
                self.legal = True
            category_headers = (
                "Category (leave one):",
                "Changelog category (leave one):",
                "Changelog category:",
                "Category:",
            )
            if stripped in category_headers:
                next_category = True
        if category in Description.MAP_CATEGORY_TO_LABEL:
            self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
        else:
            if not category:
                print("Cannot find category in pr description")
            else:
                print(("Unknown category: " + category))
--- a/utils/github/query.py
+++ b/utils/github/query.py
@ -1,492 +0,0 @@
 # -*- coding: utf-8 -*-
 import requests
 class Query:
    """
    Implements queries to the Github API using GraphQL
    """
    _PULL_REQUEST = """
        author {{
            ... on User {{
                id
                login
            }}
        }}
        baseRepository {{
            nameWithOwner
        }}
        mergeCommit {{
            oid
            parents(first: {min_page_size}) {{
                totalCount
                nodes {{
                    oid
                }}
            }}
        }}
        mergedBy {{
            ... on User {{
                id
                login
            }}
        }}
        baseRefName
        closed
        headRefName
        id
        mergeable
        merged
        number
        title
        url
    """
    def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
        self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
        self._token = token
        self._owner = owner
        self._name = name
        self._team = team
        self._max_page_size = max_page_size
        self._min_page_size = min_page_size
        self.api_costs = {}
        repo = self.get_repository()
        self._id = repo["id"]
        self.ssh_url = repo["sshUrl"]
        self.default_branch = repo["defaultBranchRef"]["name"]
        self.members = set(self.get_members())
    def get_repository(self):
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                defaultBranchRef {{
                    name
                }}
                id
                sshUrl
            }}
        """
        query = _QUERY.format(owner=self._owner, name=self._name)
        return self._run(query)["repository"]
    def get_members(self):
        """Get all team members for organization
        Returns:
            members: a map of members' logins to ids
        """
        _QUERY = """
            organization(login: "{organization}") {{
                team(slug: "{team}") {{
                    members(first: {max_page_size} {next}) {{
                        pageInfo {{
                            hasNextPage
                            endCursor
                        }}
                        nodes {{
                            id
                            login
                        }}
                    }}
                }}
            }}
        """
        members = {}
        not_end = True
        query = _QUERY.format(
            organization=self._owner,
            team=self._team,
            max_page_size=self._max_page_size,
            next="",
        )
        while not_end:
            result = self._run(query)["organization"]["team"]
            if result is None:
                break
            result = result["members"]
            not_end = result["pageInfo"]["hasNextPage"]
            query = _QUERY.format(
                organization=self._owner,
                team=self._team,
                max_page_size=self._max_page_size,
                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
            )
            members += dict([(node["login"], node["id"]) for node in result["nodes"]])
        return members
    def get_pull_request(self, number):
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                pullRequest(number: {number}) {{
                    {pull_request_data}
                }}
            }}
        """
        query = _QUERY.format(
            owner=self._owner,
            name=self._name,
            number=number,
            pull_request_data=self._PULL_REQUEST,
            min_page_size=self._min_page_size,
        )
        return self._run(query)["repository"]["pullRequest"]
    def find_pull_request(self, base, head):
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
                    nodes {{
                        {pull_request_data}
                    }}
                    totalCount
                }}
            }}
        """
        query = _QUERY.format(
            owner=self._owner,
            name=self._name,
            base=base,
            head=head,
            pull_request_data=self._PULL_REQUEST,
            min_page_size=self._min_page_size,
        )
        result = self._run(query)["repository"]["pullRequests"]
        if result["totalCount"] > 0:
            return result["nodes"][0]
        else:
            return {}
    def find_pull_requests(self, label_name):
        """
        Get all pull-requests filtered by label name
        """
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
                    nodes {{
                        {pull_request_data}
                    }}
                }}
            }}
        """
        query = _QUERY.format(
            owner=self._owner,
            name=self._name,
            label_name=label_name,
            pull_request_data=self._PULL_REQUEST,
            min_page_size=self._min_page_size,
        )
        return self._run(query)["repository"]["pullRequests"]["nodes"]
    def get_pull_requests(self, before_commit):
        """
        Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
        """
        _QUERY = """
            repository(owner: "{owner}" name: "{name}") {{
                defaultBranchRef {{
                    target {{
                        ... on Commit {{
                            history(first: {max_page_size} {next}) {{
                                pageInfo {{
                                    hasNextPage
                                    endCursor
                                }}
                                nodes {{
                                    oid
                                    associatedPullRequests(first: {min_page_size}) {{
                                        totalCount
                                        nodes {{
                                            ... on PullRequest {{
                                                {pull_request_data}
                                                labels(first: {min_page_size}) {{
                                                    totalCount
                                                    pageInfo {{
                                                        hasNextPage
                                                        endCursor
                                                    }}
                                                    nodes {{
                                                        name
                                                        color
                                                    }}
                                                }}
                                            }}
                                        }}
                                    }}
                                }}
                            }}
                        }}
                    }}
                }}
            }}
        """
        pull_requests = []
        not_end = True
        query = _QUERY.format(
            owner=self._owner,
            name=self._name,
            max_page_size=self._max_page_size,
            min_page_size=self._min_page_size,
            pull_request_data=self._PULL_REQUEST,
            next="",
        )
        while not_end:
            result = self._run(query)["repository"]["defaultBranchRef"]["target"][
                "history"
            ]
            not_end = result["pageInfo"]["hasNextPage"]
            query = _QUERY.format(
                owner=self._owner,
                name=self._name,
                max_page_size=self._max_page_size,
                min_page_size=self._min_page_size,
                pull_request_data=self._PULL_REQUEST,
                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
            )
            for commit in result["nodes"]:
                # FIXME: maybe include `before_commit`?
                if str(commit["oid"]) == str(before_commit):
                    not_end = False
                    break
                # TODO: fetch all pull-requests that were merged in a single commit.
                assert (
                    commit["associatedPullRequests"]["totalCount"]
                    <= self._min_page_size
                )
                for pull_request in commit["associatedPullRequests"]["nodes"]:
                    if (
                        pull_request["baseRepository"]["nameWithOwner"]
                        == "{}/{}".format(self._owner, self._name)
                        and pull_request["baseRefName"] == self.default_branch
                        and pull_request["mergeCommit"]["oid"] == commit["oid"]
                    ):
                        pull_requests.append(pull_request)
        return pull_requests
    def create_pull_request(
        self, source, target, title, description="", draft=False, can_modify=True
    ):
        _QUERY = """
            createPullRequest(input: {{
                baseRefName: "{target}",
                headRefName: "{source}",
                repositoryId: "{id}",
                title: "{title}",
                body: "{body}",
                draft: {draft},
                maintainerCanModify: {modify}
            }}) {{
                pullRequest {{
                    {pull_request_data}
                }}
            }}
        """
        query = _QUERY.format(
            target=target,
            source=source,
            id=self._id,
            title=title,
            body=description,
            draft="true" if draft else "false",
            modify="true" if can_modify else "false",
            pull_request_data=self._PULL_REQUEST,
        )
        return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
    def merge_pull_request(self, id):
        _QUERY = """
            mergePullRequest(input: {{
                pullRequestId: "{id}"
            }}) {{
                pullRequest {{
                    {pull_request_data}
                }}
            }}
        """
        query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
        return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
    # FIXME: figure out how to add more assignees at once
    def add_assignee(self, pr, assignee):
        _QUERY = """
            addAssigneesToAssignable(input: {{
                assignableId: "{id1}",
                assigneeIds: "{id2}"
            }}) {{
                clientMutationId
            }}
        """
        query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
        self._run(query, is_mutation=True)
    def set_label(self, pull_request, label_name):
        """
        Set label by name to the pull request
        Args:
            pull_request: JSON object returned by `get_pull_requests()`
            label_name (string): label name
        """
        _GET_LABEL = """
            repository(owner: "{owner}" name: "{name}") {{
                labels(first: {max_page_size} {next} query: "{label_name}") {{
                    pageInfo {{
                        hasNextPage
                        endCursor
                    }}
                    nodes {{
                        id
                        name
                        color
                    }}
                }}
            }}
        """
        _SET_LABEL = """
            addLabelsToLabelable(input: {{
                labelableId: "{pr_id}",
                labelIds: "{label_id}"
            }}) {{
                clientMutationId
            }}
        """
        labels = []
        not_end = True
        query = _GET_LABEL.format(
            owner=self._owner,
            name=self._name,
            label_name=label_name,
            max_page_size=self._max_page_size,
            next="",
        )
        while not_end:
            result = self._run(query)["repository"]["labels"]
            not_end = result["pageInfo"]["hasNextPage"]
            query = _GET_LABEL.format(
                owner=self._owner,
                name=self._name,
                label_name=label_name,
                max_page_size=self._max_page_size,
                next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
            )
            labels += [label for label in result["nodes"]]
        if not labels:
            return
        query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
        self._run(query, is_mutation=True)
    def _run(self, query, is_mutation=False):
        from requests.adapters import HTTPAdapter
        from urllib3.util.retry import Retry
        def requests_retry_session(
            retries=3,
            backoff_factor=0.3,
            status_forcelist=(500, 502, 504),
            session=None,
        ):
            session = session or requests.Session()
            retry = Retry(
                total=retries,
                read=retries,
                connect=retries,
                backoff_factor=backoff_factor,
                status_forcelist=status_forcelist,
            )
            adapter = HTTPAdapter(max_retries=retry)
            session.mount("http://", adapter)
            session.mount("https://", adapter)
            return session
        headers = {"Authorization": "bearer {}".format(self._token)}
        if is_mutation:
            query = """
            mutation {{
                {query}
            }}
            """.format(
                query=query
            )
        else:
            query = """
            query {{
                {query}
                rateLimit {{
                    cost
                    remaining
                }}
            }}
            """.format(
                query=query
            )
        while True:
            request = requests_retry_session().post(
                "https://api.github.com/graphql", json={"query": query}, headers=headers
            )
            if request.status_code == 200:
                result = request.json()
                if "errors" in result:
                    raise Exception(
                        "Errors occurred: {}\nOriginal query: {}".format(
                            result["errors"], query
                        )
                    )
                if not is_mutation:
                    import inspect
                    caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
                    if caller not in list(self.api_costs.keys()):
                        self.api_costs[caller] = 0
                    self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
                return result["data"]
            else:
                import json
                raise Exception(
                    "Query failed with code {code}:\n{json}".format(
                        code=request.status_code,
                        json=json.dumps(request.json(), indent=4),
                    )
                )
--- a/utils/tests-visualizer/index.html
+++ b/utils/tests-visualizer/index.html
@ -84,14 +84,14 @@ let render_data_query = `
    SELECT groupArray([d, n, fail]) FROM
    (
        SELECT n, check_start_time::Date - start_date AS d, max(test_status LIKE 'F%' OR test_status LIKE 'E%') AS fail
-        FROM "gh-data".checks
+        FROM "default".checks
        INNER JOIN
        (
            SELECT test_name, toUInt16(rowNumberInAllBlocks()) AS n FROM
            (
                SELECT DISTINCT test_name
-                FROM "gh-data".checks
+                FROM "default".checks
                WHERE match(test_name, '^\\d+_') AND check_name ILIKE '%stateless%' AND check_start_time > now() - INTERVAL 1 DAY
                ORDER BY test_name
            )
@ -112,7 +112,7 @@ let test_names_query = `
    SELECT test_name, toUInt16(rowNumberInAllBlocks()) AS n FROM
    (
        SELECT DISTINCT test_name
-        FROM "gh-data".checks
+        FROM "default".checks
        WHERE match(test_name, '^\\d+_') AND check_name ILIKE '%stateless%' AND check_start_time > now() - INTERVAL 1 DAY
        ORDER BY test_name
    ) FORMAT JSONCompact`;
		`@ -0,0 +1 @@`
							00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000