Merge branch 'master' into keeper-retries-by-default

2024-11-10 01:25:21 +00:00 · 2023-02-21 17:59:55 +01:00 · 2023-02-21 17:59:55 +01:00 · bf05082984
commit bf05082984
parent abeb1c5ba8 848286eca7
110 changed files with 2293 additions and 245 deletions
--- a/.gitignore
+++ b/.gitignore
@ -161,6 +161,7 @@ website/package-lock.json
 tests/queries/0_stateless/test_*
 tests/queries/0_stateless/*.binary
 tests/queries/0_stateless/*.generated-expect
+tests/queries/0_stateless/*.expect.history

 # rust
 /rust/**/target
--- a/contrib/llvm-project
+++ b/contrib/llvm-project
@ -1 +1 @@
-Subproject commit e61a81aa6fc529b469e2a54b7ce788606e138b5d
+Subproject commit a8bf69e9cd39a23140a2b633c172d201484172da
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@ -40,12 +40,16 @@ def get_options(i, backward_compatibility_check):
            client_options.append("join_algorithm='auto'")
            client_options.append('max_rows_in_join=1000')

-    if i == 13:
+    if i % 5 == 1:
        client_options.append("memory_tracker_fault_probability=0.001")

    if i % 2 == 1 and not backward_compatibility_check:
        client_options.append("group_by_use_nulls=1")

+    if i == 12:     # 12 % 3 == 0, so it's Atomic database
+        client_options.append("implicit_transaction=1")
+        client_options.append("throw_on_unsupported_query_inside_transaction=0")
+
    if client_options:
        options.append(" --client-option " + " ".join(client_options))

@ -74,7 +78,7 @@ def run_func_test(
    pipes = []
    for i in range(0, len(output_paths)):
        f = open(output_paths[i], "w")
-        full_command = "{} {} {} {} {} --stress".format(
+        full_command = "{} {} {} {} {}".format(
            cmd,
            get_options(i, backward_compatibility_check),
            global_time_limit_option,
--- a/docs/en/development/continuous-integration.md
+++ b/docs/en/development/continuous-integration.md
@ -43,11 +43,6 @@ Tries to build the ClickHouse documentation website. It can fail if you changed
 something in the documentation. Most probable reason is that some cross-link in
 the documentation is wrong. Go to the check report and look for `ERROR` and `WARNING` messages.

-### Report Details
-
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check.html)
- `docs_output.txt` contains the building log. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check/docs_output.txt)
-

 ## Description Check

@ -72,10 +67,6 @@ This check means that the CI system started to process the pull request. When it
 Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
 If it fails, fix the style errors following the [code style guide](style.md).

-### Report Details
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).
-

 ## Fast Test
 Normally this is the first check that is ran for a PR. It builds ClickHouse and
@ -84,8 +75,6 @@ some. If it fails, further checks are not started until it is fixed. Look at
 the report to see which tests fail, then reproduce the failure locally as
 described [here](tests.md#functional-test-locally).

-### Report Details
-[Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/fast_test.html)

 #### Status Page Files
 - `runlog.out.log` is the general log that includes all other logs.
@ -113,9 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t

 ### Report Details

-[Status page example](https://clickhouse-builds.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/clickhouse_build_check/report.html).
-
- **Compiler**: `gcc-9` or `clang-10` (or `clang-10-xx` for other architectures e.g. `clang-10-freebsd`).
+- **Compiler**: `clang-15`, optionally with the name of a target platform
 - **Build type**: `Debug` or `RelWithDebInfo` (cmake).
 - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
 - **Status**: `success` or `fail`
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@ -471,3 +471,38 @@ Result:

 -   [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers)
 -   [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals)
+
+
+## JSONArrayLength
+
+Returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid.
+
+**Syntax**
+
+``` sql
+JSONArrayLength(json)
+```
+
+Alias: `JSON_ARRAY_LENGTH(json)`.
+
+**Arguments**
+
+-   `json` — [String](../../sql-reference/data-types/string.md) with valid JSON.
+
+**Returned value**
+
+-   If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL.
+
+Type: [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md).
+
+**Example**
+
+``` sql
+SELECT
+    JSONArrayLength(''),
+    JSONArrayLength('[1,2,3]')
+
+┌─JSONArrayLength('')─┬─JSONArrayLength('[1,2,3]')─┐
+│                ᴺᵁᴸᴸ │                          3 │
+└─────────────────────┴────────────────────────────┘
+```
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@ -295,6 +295,10 @@ SELECT byteSize(NULL, 1, 0.3, '');

 Спит seconds секунд на каждый блок данных. Можно указать как целое число, так и число с плавающей запятой.

+## sleepEachRow(seconds) {# sleepeachrowseconds}
+
+Спит seconds секунд на каждую строку. Можно указать как целое число, так и число с плавающей запятой.
+
 ## currentDatabase() {#currentdatabase}

 Возвращает имя текущей базы данных.
@ -590,6 +594,27 @@ LIMIT 10
 └────────────────┴─────────┘
 ```

+## formatReadableDecimalSize(x)
+
+Принимает размер (число байт). Возвращает округленный размер с суффиксом (KiB, MiB и т.д.) в виде строки.
+
+Пример:
+
+``` sql
+SELECT
+    arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes,
+    formatReadableDecimalSize(filesize_bytes) AS filesize
+```
+
+``` text
+┌─filesize_bytes─┬─filesize───┐
+│              1 │ 1.00 B     │
+│           1024 │ 1.02 KB   │
+│        1048576 │ 1.05 MB   │
+│      192851925 │ 192.85 MB │
+└────────────────┴────────────┘
+```
+
 ## formatReadableSize(x) {#formatreadablesizex}

 Принимает размер (число байт). Возвращает округленный размер с суффиксом (KiB, MiB и т.д.) в виде строки.
@ -634,6 +659,92 @@ SELECT
 └────────────────┴───────────────────┘
 ```

+## formatReadableTimeDelta {#formatreadabletimedelta}
+
+Принимает дельту времени в секундах. Возвращает дельту времени с (год, месяц, день, час, минута, секунда) в виде строки.
+
+**Синтаксис**
+
+``` sql
+formatReadableTimeDelta(column[, maximum_unit])
+```
+
+**Аргументы**
+
+-   `column` — Столбец с числовой дельтой времени.
+-   `maximum_unit` — Опциональный параметр. Максимальная единица измерения для отображения. Допустимые значения: секунды, минуты, часы, дни, месяцы, годы.
+
+Пример:
+
+``` sql
+SELECT
+    arrayJoin([100, 12345, 432546534]) AS elapsed,
+    formatReadableTimeDelta(elapsed) AS time_delta
+```
+
+``` text
+┌────elapsed─┬─time_delta ─────────────────────────────────────────────────────┐
+│        100 │ 1 minute and 40 seconds                                         │
+│      12345 │ 3 hours, 25 minutes and 45 seconds                              │
+│  432546534 │ 13 years, 8 months, 17 days, 7 hours, 48 minutes and 54 seconds │
+└────────────┴─────────────────────────────────────────────────────────────────┘
+```
+
+``` sql
+SELECT
+    arrayJoin([100, 12345, 432546534]) AS elapsed,
+    formatReadableTimeDelta(elapsed, 'minutes') AS time_delta
+```
+
+``` text
+┌────elapsed─┬─time_delta ─────────────────────────────────────────────────────┐
+│        100 │ 1 minute and 40 seconds                                         │
+│      12345 │ 205 minutes and 45 seconds                                      │
+│  432546534 │ 7209108 minutes and 54 seconds                                  │
+└────────────┴─────────────────────────────────────────────────────────────────┘
+```
+
+## parseTimeDelta {#parsetimedelta}
+
+Преобразует последовательность символов, которая напоминает нечто похожее на единицу времени.
+
+**Синтаксис**
+
+```sql
+parseTimeDelta(timestr)
+```
+
+**Аргументы**
+
+-   `timestr` — Последовательность символов, которая напоминает нечто похожее на единицу времени.
+
+
+**Возвращаемое значение**
+
+-   Число с плавающей точкой, содержащее количество секунд.
+
+**Пример**
+
+```sql
+SELECT parseTimeDelta('11s+22min')
+```
+
+```text
+┌─parseTimeDelta('11s+22min')─┐
+│                        1331 │
+└─────────────────────────────┘
+```
+
+```sql
+SELECT parseTimeDelta('1yr2mo')
+```
+
+```text
+┌─parseTimeDelta('1yr2mo')─┐
+│                 36806400 │
+└──────────────────────────┘
+```
+
 ## least(a, b) {#leasta-b}

 Возвращает наименьшее значение из a и b.
@ -657,6 +768,10 @@ SELECT
 Возвращает ID сборки, сгенерированный компилятором для данного сервера ClickHouse.
 Если функция вызывается в контексте распределенной таблицы, то она генерирует обычный столбец со значениями, актуальными для каждого шарда. Иначе возвращается константа.

+## blockNumber {#function-blocknumber}
+
+Возвращает порядковый номер блока данных, в котором находится строка.
+
 ## rowNumberInBlock {#function-rownumberinblock}

 Возвращает порядковый номер строки в блоке данных. Для каждого блока данных нумерация начинается с 0.
@ -679,6 +794,7 @@ neighbor(column, offset[, default_value])

 :::danger "Предупреждение"
    Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных.
+:::

 Порядок строк, используемый при вычислении функции `neighbor`, может отличаться от порядка строк, возвращаемых пользователю.
 Чтобы этого не случилось, вы можете сделать подзапрос с [ORDER BY](../../sql-reference/statements/select/order-by.md) и вызвать функцию извне подзапроса.
@ -788,6 +904,7 @@ FROM numbers(16)

 :::danger "Предупреждение"
    Функция может взять значение предыдущей строки только внутри текущего обработанного блока данных.
+:::

 Результат функции зависит от затронутых блоков данных и порядка данных в блоке.

@ -869,7 +986,7 @@ WHERE diff != 1
 :::danger "Предупреждение"
    События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение.
    Каждый блок данных обрабатывается независимо. Если события из разных блоков данных накладываются по времени, они не могут быть корректно обработаны.
-
+:::
 **Синтаксис**

 ``` sql
@ -1560,6 +1677,7 @@ FROM numbers(10);

 :::danger "Warning"
    Функция обнуляет состояние для каждого нового блока.
+:::

 **Синтаксис**

--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@ -17,6 +17,8 @@ User=clickhouse
 Group=clickhouse
 Restart=always
 RestartSec=30
+# Since ClickHouse is systemd aware default 1m30sec may not be enough
+TimeoutStartSec=inifinity
 # %p is resolved to the systemd unit name
 RuntimeDirectory=%p 
 ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
--- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
+++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
@ -0,0 +1,62 @@
+#include "AutoFinalOnQueryPass.h"
+
+#include <Analyzer/TableNode.h>
+#include <Analyzer/TableExpressionModifiers.h>
+#include <Storages/IStorage.h>
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+
+namespace DB
+{
+
+namespace
+{
+    class AutoFinalOnQueryPassVisitor : public InDepthQueryTreeVisitorWithContext<AutoFinalOnQueryPassVisitor>
+    {
+    public:
+        using Base = InDepthQueryTreeVisitorWithContext<AutoFinalOnQueryPassVisitor>;
+        using Base::Base;
+
+        void visitImpl(QueryTreeNodePtr & node)
+        {
+            if (auto * table_node = node->as<TableNode>())
+            {
+                if (autoFinalOnQuery(*table_node, table_node->getStorage(), getContext()))
+                {
+                    auto modifier = TableExpressionModifiers(true, std::nullopt, std::nullopt);
+                    table_node->setTableExpressionModifiers(modifier);
+                }
+            }
+        }
+
+    private:
+        static bool autoFinalOnQuery(TableNode & table_node, StoragePtr storage, ContextPtr context)
+        {
+            bool is_auto_final_setting_on = context->getSettingsRef().final;
+            bool is_final_supported = storage && storage->supportsFinal() && !storage->isRemote();
+            bool is_query_already_final = table_node.hasTableExpressionModifiers() ? table_node.getTableExpressionModifiers().has_value() : false;
+
+            return is_auto_final_setting_on && !is_query_already_final && is_final_supported;
+        }
+
+    };
+
+}
+
+String AutoFinalOnQueryPass::getName()
+{
+    return "AutoFinalOnQueryPass";
+}
+
+String AutoFinalOnQueryPass::getDescription()
+{
+    return "Automatically applies final modifier to queries if it is supported and if user level final setting is set.";
+}
+
+void AutoFinalOnQueryPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+{
+    auto visitor = AutoFinalOnQueryPassVisitor(std::move(context));
+
+    visitor.visit(query_tree_node);
+}
+
+}
--- a/src/Analyzer/Passes/AutoFinalOnQueryPass.h
+++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.h
@ -0,0 +1,21 @@
+#pragma once
+
+#include <Analyzer/IQueryTreePass.h>
+#include <Storages/IStorage_fwd.h>
+#include <Analyzer/TableNode.h>
+
+namespace DB
+{
+
+
+class AutoFinalOnQueryPass final : public IQueryTreePass
+{
+public:
+    String getName() override;
+
+    String getDescription() override;
+
+    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+};
+
+}
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@ -36,9 +36,11 @@
 #include <Analyzer/Passes/ConvertOrLikeChainPass.h>
 #include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
 #include <Analyzer/Passes/GroupingFunctionsResolvePass.h>
+#include <Analyzer/Passes/AutoFinalOnQueryPass.h>
 #include <Analyzer/Passes/ArrayExistsToHasPass.h>
 #include <Analyzer/Passes/ComparisonTupleEliminationPass.h>

+
 namespace DB
 {

@ -263,6 +265,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
    manager.addPass(std::make_unique<ConvertOrLikeChainPass>());

    manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
+
+    manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
+
 }

 }
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@ -40,7 +40,7 @@ ThreadStatus & CurrentThread::get()

 ProfileEvents::Counters & CurrentThread::getProfileEvents()
 {
-    return current_thread ? current_thread->performance_counters : ProfileEvents::global_counters;
+    return current_thread ? *current_thread->current_performance_counters : ProfileEvents::global_counters;
 }

 void CurrentThread::updateProgressIn(const Progress & value)
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@ -92,7 +92,7 @@ public:
    static void detachQueryIfNotDetached();

    /// Initializes query with current thread as master thread in constructor, and detaches it in destructor
-    struct QueryScope
+    struct QueryScope : private boost::noncopyable
    {
        explicit QueryScope(ContextMutablePtr query_context);
        explicit QueryScope(ContextPtr query_context);
--- a/src/Common/ProfileEventsScope.cpp
+++ b/src/Common/ProfileEventsScope.cpp
@ -0,0 +1,32 @@
+#include <Common/ProfileEventsScope.h>
+
+namespace DB
+{
+
+
+ProfileEventsScope::ProfileEventsScope()
+    : performance_counters_holder(std::make_unique<ProfileEvents::Counters>())
+    , performance_counters_scope(performance_counters_holder.get())
+    , previous_counters_scope(CurrentThread::get().attachProfileCountersScope(performance_counters_scope))
+{
+}
+
+ProfileEventsScope::ProfileEventsScope(ProfileEvents::Counters * performance_counters_scope_)
+    : performance_counters_scope(performance_counters_scope_)
+    , previous_counters_scope(CurrentThread::get().attachProfileCountersScope(performance_counters_scope))
+{
+}
+
+std::shared_ptr<ProfileEvents::Counters::Snapshot> ProfileEventsScope::getSnapshot()
+{
+    return std::make_shared<ProfileEvents::Counters::Snapshot>(performance_counters_scope->getPartiallyAtomicSnapshot());
+}
+
+ProfileEventsScope::~ProfileEventsScope()
+{
+    /// Restore previous performance counters
+    CurrentThread::get().attachProfileCountersScope(previous_counters_scope);
+}
+
+
+}
--- a/src/Common/ProfileEventsScope.h
+++ b/src/Common/ProfileEventsScope.h
@ -0,0 +1,35 @@
+#pragma once
+
+#include <Common/ProfileEvents.h>
+#include <Common/CurrentThread.h>
+
+namespace DB
+{
+
+/// Use specific performance counters for current thread in the current scope.
+class ProfileEventsScope : private boost::noncopyable
+{
+public:
+    /// Counters are owned by this object.
+    ProfileEventsScope();
+
+    /// Shared counters are stored outside.
+    /// Useful when we calculate metrics entering into some scope several times.
+    explicit ProfileEventsScope(ProfileEvents::Counters * performance_counters_scope_);
+
+    std::shared_ptr<ProfileEvents::Counters::Snapshot> getSnapshot();
+
+    ~ProfileEventsScope();
+
+private:
+    /// If set, then performance_counters_scope is owned by this object.
+    /// Otherwise, counters are passed to the constructor from outside.
+    std::unique_ptr<ProfileEvents::Counters> performance_counters_holder;
+
+    ProfileEvents::Counters * performance_counters_scope;
+    ProfileEvents::Counters * previous_counters_scope;
+};
+
+
+}
+
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@ -124,6 +124,10 @@ public:

    /// TODO: merge them into common entity
    ProfileEvents::Counters performance_counters{VariableContext::Thread};
+
+    /// Points to performance_counters by default.
+    /// Could be changed to point to another object to calculate performance counters for some narrow scope.
+    ProfileEvents::Counters * current_performance_counters{&performance_counters};
    MemoryTracker memory_tracker{VariableContext::Thread};

    /// Small amount of untracked memory (per thread atomic-less counter)
@ -139,6 +143,7 @@ public:
    Deleter deleter;

 protected:
+    /// Group of threads, to which this thread attached
    ThreadGroupStatusPtr thread_group;

    std::atomic<int> thread_state{ThreadState::DetachedFromQuery};
@ -244,6 +249,10 @@ public:
    /// Attaches slave thread to existing thread group
    void attachQuery(const ThreadGroupStatusPtr & thread_group_, bool check_detached = true);

+    /// Returns pointer to the current profile counters to restore them back.
+    /// Note: consequent call with new scope will detach previous scope.
+    ProfileEvents::Counters * attachProfileCountersScope(ProfileEvents::Counters * performance_counters_scope);
+
    InternalTextLogsQueuePtr getInternalTextLogsQueue() const
    {
        return thread_state == Died ? nullptr : logs_queue_ptr.lock();
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -114,6 +114,8 @@ class IColumn;
    \
    M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
    M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \
+    M(Bool, move_all_conditions_to_prewhere, false, "Move all viable conditions from WHERE to PREWHERE", 0) \
+    M(Bool, enable_multiple_prewhere_read_steps, false, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
    \
    M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
    M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
@ -273,6 +275,8 @@ class IColumn;
    M(Milliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \
    M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
    \
+    M(Bool, final, false, "Query with the FINAL modifier by default. If the engine does not support final, it does not have any effect. On queries with multiple tables final is applied only on those that support it. It also works on distributed tables", 0) \
+    \
    /** Settings for testing hedged requests */ \
    M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
    M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
--- a/src/Functions/JSONArrayLength.cpp
+++ b/src/Functions/JSONArrayLength.cpp
@ -0,0 +1,110 @@
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Interpreters/Context.h>
+#include "config.h"
+
+#if USE_SIMDJSON
+#    include <Common/JSONParsers/SimdJSONParser.h>
+#elif USE_RAPIDJSON
+#    include <Common/JSONParsers/RapidJSONParser.h>
+#else
+#    include <Common/JSONParsers/DummyJSONParser.h>
+#endif
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
+namespace
+{
+    /// JSONArrayLength(json)
+    class FunctionJSONArrayLength : public IFunction
+    {
+    public:
+        static constexpr auto name = "JSONArrayLength";
+        static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionJSONArrayLength>(); }
+
+        String getName() const override { return name; }
+
+        bool isVariadic() const override { return false; }
+        bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+        size_t getNumberOfArguments() const override { return 1; }
+        bool useDefaultImplementationForConstants() const override { return true; }
+
+        DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+        {
+            auto args = FunctionArgumentDescriptors{
+                {"json", &isString<IDataType>, nullptr, "String"},
+            };
+
+            validateFunctionArgumentTypes(*this, arguments, args);
+            return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>());
+        }
+
+        ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+        {
+            const ColumnPtr column = arguments[0].column;
+            const ColumnString * col = typeid_cast<const ColumnString *>(column.get());
+            if (!col)
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be string", getName());
+
+            auto null_map = ColumnUInt8::create();
+            auto data = ColumnUInt64::create();
+            null_map->reserve(input_rows_count);
+            data->reserve(input_rows_count);
+
+#if USE_SIMDJSON
+            SimdJSONParser parser;
+            SimdJSONParser::Element element;
+#elif USE_RAPIDJSON
+            RapidJSONParser parser;
+            RapidJSONParser::Element element;
+#else
+            DummyJSONParser parser;
+            DummyJSONParser::Element element;
+#endif
+
+            for (size_t i = 0; i < input_rows_count; ++i)
+            {
+                auto str_ref = col->getDataAt(i);
+                std::string_view str_view(str_ref.data, str_ref.size);
+                bool ok = parser.parse(std::move(str_view), element);
+                if (!ok || !element.isArray())
+                {
+                    null_map->insertValue(1);
+                    data->insertDefault();
+                }
+                else
+                {
+                    auto array = element.getArray();
+                    null_map->insertValue(0);
+                    data->insertValue(array.size());
+                }
+            }
+            return ColumnNullable::create(std::move(data), std::move(null_map));
+        }
+    };
+
+}
+
+REGISTER_FUNCTION(JSONArrayLength)
+{
+    factory.registerFunction<FunctionJSONArrayLength>(Documentation{
+        "Returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid."});
+
+    /// For Spark compatibility.
+    factory.registerAlias("JSON_ARRAY_LENGTH", "JSONArrayLength", FunctionFactory::CaseInsensitive);
+}
+
+}
--- a/src/Functions/array/FunctionArrayMapped.h
+++ b/src/Functions/array/FunctionArrayMapped.h
@ -7,6 +7,7 @@
 #include <Columns/ColumnFunction.h>
 #include <Columns/ColumnMap.h>
 #include <Columns/ColumnNullable.h>
+#include <Columns/ColumnLowCardinality.h>
 #include <Columns/IColumn.h>

 #include <Common/Exception.h>
@ -393,9 +394,15 @@ public:
            replicated_column_function->appendArguments(arrays);

            auto lambda_result = replicated_column_function->reduce();
+
+            /// Convert LowCardinality(T) -> T and Const(LowCardinality(T)) -> Const(T),
+            /// because we removed LowCardinality from return type of lambda expression.
            if (lambda_result.column->lowCardinality())
                lambda_result.column = lambda_result.column->convertToFullColumnIfLowCardinality();

+            if (const auto * const_column = checkAndGetColumnConst<ColumnLowCardinality>(lambda_result.column.get()))
+                lambda_result.column = const_column->removeLowCardinality();
+
            if (Impl::needBoolean())
            {
                /// If result column is Nothing or Nullable(Nothing), just create const UInt8 column with 0 value.
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@ -1017,6 +1017,9 @@ std::string ActionsDAG::dumpDAG() const
        out << ' ' << map[node];
    out << '\n';

+    out << "Project input: " << project_input << '\n';
+    out << "Projected output: " << projected_output << '\n';
+
    return out.str();
 }

--- a/src/Interpreters/ExpressionActions.cpp
+++ b/src/Interpreters/ExpressionActions.cpp
@ -584,6 +584,12 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon
            {
                /// Do not execute function if it's result is already known.
                res_column.column = action.node->column->cloneResized(num_rows);
+                /// But still need to remove unused arguments.
+                for (const auto & argument : action.arguments)
+                {
+                    if (!argument.needed_later)
+                        columns[argument.pos] = {};
+                }
                break;
            }

--- a/src/Interpreters/IInterpreter.cpp
+++ b/src/Interpreters/IInterpreter.cpp
@ -30,7 +30,7 @@ void IInterpreter::extendQueryLogElem(
    extendQueryLogElemImpl(elem, ast, context);
 }

-void IInterpreter::checkStorageSupportsTransactionsIfNeeded(const StoragePtr & storage, ContextPtr context)
+void IInterpreter::checkStorageSupportsTransactionsIfNeeded(const StoragePtr & storage, ContextPtr context, bool is_readonly_query)
 {
    if (!context->getCurrentTransaction())
        return;
@ -41,6 +41,13 @@ void IInterpreter::checkStorageSupportsTransactionsIfNeeded(const StoragePtr & s
    if (context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Storage {} (table {}) does not support transactions",
                        storage->getName(), storage->getStorageID().getNameForLogs());
+
+    /// Do not allow transactions with ReplicatedMergeTree anyway (unless it's a readonly SELECT query)
+    /// because it may try to process transaction on MergeTreeData-level,
+    /// but then fail with a logical error or something on StorageReplicatedMergeTree-level.
+    if (!is_readonly_query && storage->supportsReplication())
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ReplicatedMergeTree (table {}) does not support transactions",
+                        storage->getStorageID().getNameForLogs());
 }

 }
--- a/src/Interpreters/IInterpreter.h
+++ b/src/Interpreters/IInterpreter.h
@ -39,7 +39,7 @@ public:
    virtual bool supportsTransactions() const { return false; }

    /// Helper function for some Interpreters.
-    static void checkStorageSupportsTransactionsIfNeeded(const StoragePtr & storage, ContextPtr context);
+    static void checkStorageSupportsTransactionsIfNeeded(const StoragePtr & storage, ContextPtr context, bool is_readonly_query = false);

    virtual ~IInterpreter() = default;
 };
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@ -120,10 +120,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
    auto [database, table] = query.if_exists ? DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, context_)
                                             : DatabaseCatalog::instance().getDatabaseAndTable(table_id, context_);

-    checkStorageSupportsTransactionsIfNeeded(table, context_);
-
    if (database && table)
    {
+        checkStorageSupportsTransactionsIfNeeded(table, context_);
+
        auto & ast_drop_query = query.as<ASTDropQuery &>();

        if (ast_drop_query.is_view && !table->isView())
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -436,7 +436,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
    {
        if (storage)
-            checkStorageSupportsTransactionsIfNeeded(storage, context);
+            checkStorageSupportsTransactionsIfNeeded(storage, context, /* is_readonly_query */ true);
        for (const auto & table : joined_tables.tablesWithColumns())
        {
            if (table.table.table.empty())
@ -444,7 +444,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
            auto maybe_storage = DatabaseCatalog::instance().tryGetTable({table.table.database, table.table.table}, context);
            if (!maybe_storage)
                continue;
-            checkStorageSupportsTransactionsIfNeeded(storage, context);
+            checkStorageSupportsTransactionsIfNeeded(storage, context, /* is_readonly_query */ true);
        }
    }

@ -509,6 +509,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
        query_info.additional_filter_ast = parseAdditionalFilterConditionForTable(
            settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);

+    if (autoFinalOnQuery(query))
+    {
+        query.setFinal();
+    }
+
    auto analyze = [&] (bool try_move_to_prewhere)
    {
        /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
@ -3020,6 +3025,15 @@ void InterpreterSelectQuery::ignoreWithTotals()
    getSelectQuery().group_by_with_totals = false;
 }

+bool InterpreterSelectQuery::autoFinalOnQuery(ASTSelectQuery & query)
+{
+    // query.tables() is required because not all queries have tables in it, it could be a function.
+    bool is_auto_final_setting_on = context->getSettingsRef().final;
+    bool is_final_supported = storage && storage->supportsFinal() && !storage->isRemote() && query.tables();
+    bool is_query_already_final = query.final();
+
+    return is_auto_final_setting_on && !is_query_already_final && is_final_supported;
+}

 void InterpreterSelectQuery::initSettings()
 {
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@ -184,6 +184,7 @@ private:
    void executeDistinct(QueryPlan & query_plan, bool before_order, Names columns, bool pre_distinct);
    void executeExtremes(QueryPlan & query_plan);
    void executeSubqueriesInSetsAndJoins(QueryPlan & query_plan);
+    bool autoFinalOnQuery(ASTSelectQuery & select_query);

    enum class Modificator
    {
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@ -11,6 +11,9 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Interpreters/PartLog.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/ProfileEventsExt.h>
+#include <Common/ProfileEvents.h>
+#include <DataTypes/DataTypeMap.h>

 #include <Common/CurrentThread.h>

@ -121,6 +124,17 @@ NamesAndTypesList PartLogElement::getNamesAndTypes()
        /// Is there an error during the execution or commit
        {"error", std::make_shared<DataTypeUInt16>()},
        {"exception", std::make_shared<DataTypeString>()},
+
+        {"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
+    };
+}
+
+NamesAndAliases PartLogElement::getNamesAndAliases()
+{
+    return
+    {
+        {"ProfileEvents.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(ProfileEvents)"},
+        {"ProfileEvents.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())}, "mapValues(ProfileEvents)"},
    };
 }

@ -163,18 +177,20 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const

    columns[i++]->insert(error);
    columns[i++]->insert(exception);
+
+    if (profile_counters)
+    {
+        auto * column = columns[i++].get();
+        ProfileEvents::dumpToMapColumn(*profile_counters, column, true);
+    }
+    else
+    {
+        columns[i++]->insertDefault();
+    }
 }

-
-bool PartLog::addNewPart(
-    ContextPtr current_context, const MutableDataPartPtr & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
-{
-    return addNewParts(current_context, {part}, elapsed_ns, execution_status);
-}
-
-
 bool PartLog::addNewParts(
-    ContextPtr current_context, const PartLog::MutableDataPartsVector & parts, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
+    ContextPtr current_context, const PartLog::PartLogEntries & parts, const ExecutionStatus & execution_status)
 {
    if (parts.empty())
        return true;
@ -183,15 +199,17 @@ bool PartLog::addNewParts(

    try
    {
-        auto table_id = parts.front()->storage.getStorageID();
+        auto table_id = parts.front().part->storage.getStorageID();
        part_log = current_context->getPartLog(table_id.database_name); // assume parts belong to the same table
        if (!part_log)
            return false;

        auto query_id = CurrentThread::getQueryId();

-        for (const auto & part : parts)
+        for (const auto & part_log_entry : parts)
        {
+            const auto & part = part_log_entry.part;
+
            PartLogElement elem;

            if (!query_id.empty())
@ -204,7 +222,7 @@ bool PartLog::addNewParts(
            const auto time_now = std::chrono::system_clock::now();
            elem.event_time = timeInSeconds(time_now);
            elem.event_time_microseconds = timeInMicroseconds(time_now);
-            elem.duration_ms = elapsed_ns / 1000000;
+            elem.duration_ms = part_log_entry.elapsed_ns / 1000000;

            elem.database_name = table_id.database_name;
            elem.table_name = table_id.table_name;
@ -221,6 +239,8 @@ bool PartLog::addNewParts(
            elem.error = static_cast<UInt16>(execution_status.code);
            elem.exception = execution_status.message;

+            elem.profile_counters = part_log_entry.profile_counters;
+
            part_log->add(elem);
        }
    }
@ -233,4 +253,21 @@ bool PartLog::addNewParts(
    return true;
 }

+bool PartLog::addNewPart(ContextPtr context, const PartLog::PartLogEntry & part, const ExecutionStatus & execution_status)
+{
+    return addNewParts(context, {part}, execution_status);
+}
+
+
+PartLog::PartLogEntries PartLog::createPartLogEntries(const MutableDataPartsVector & parts, UInt64 elapsed_ns, ProfileCountersSnapshotPtr profile_counters)
+{
+    PartLogEntries part_log_entries;
+    part_log_entries.reserve(parts.size());
+
+    for (const auto & part : parts)
+        part_log_entries.emplace_back(part, elapsed_ns, profile_counters);
+
+    return part_log_entries;
+}
+
 }
--- a/src/Interpreters/PartLog.h
+++ b/src/Interpreters/PartLog.h
@ -8,6 +8,10 @@
 #include <Storages/MergeTree/MergeType.h>
 #include <Storages/MergeTree/MergeAlgorithm.h>

+namespace ProfileEvents
+{
+    class Counters;
+}

 namespace DB
 {
@ -81,13 +85,15 @@ struct PartLogElement
    UInt16 error = 0;
    String exception;

+    std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters;
+
    static std::string name() { return "PartLog"; }

    static MergeReasonType getMergeReasonType(MergeType merge_type);
    static PartMergeAlgorithm getMergeAlgorithm(MergeAlgorithm merge_algorithm_);

    static NamesAndTypesList getNamesAndTypes();
-    static NamesAndAliases getNamesAndAliases() { return {}; }
+    static NamesAndAliases getNamesAndAliases();
    void appendToBlock(MutableColumns & columns) const;
    static const char * getCustomColumnList() { return nullptr; }
 };
@ -103,11 +109,37 @@ class PartLog : public SystemLog<PartLogElement>
    using MutableDataPartPtr = std::shared_ptr<IMergeTreeDataPart>;
    using MutableDataPartsVector = std::vector<MutableDataPartPtr>;

+    using ProfileCountersSnapshotPtr = std::shared_ptr<ProfileEvents::Counters::Snapshot>;
+
 public:
+    struct PartLogEntry
+    {
+        std::shared_ptr<IMergeTreeDataPart> part;
+        ProfileCountersSnapshotPtr profile_counters;
+        UInt64 elapsed_ns;
+
+        PartLogEntry(std::shared_ptr<IMergeTreeDataPart> part_, UInt64 elapsed_ns_)
+            : part(std::move(part_)), elapsed_ns(elapsed_ns_)
+        {
+        }
+
+        PartLogEntry(std::shared_ptr<IMergeTreeDataPart> part_, UInt64 elapsed_ns_, ProfileCountersSnapshotPtr profile_counters_)
+            : part(std::move(part_))
+            , profile_counters(std::move(profile_counters_))
+            , elapsed_ns(elapsed_ns_)
+        {
+        }
+    };
+
+    using PartLogEntries = std::vector<PartLogEntry>;
+
+    static PartLogEntries createPartLogEntries(const MutableDataPartsVector & parts, UInt64 elapsed_ns, ProfileCountersSnapshotPtr profile_counters = {});
+
    /// Add a record about creation of new part.
-    static bool addNewPart(ContextPtr context, const MutableDataPartPtr & part, UInt64 elapsed_ns,
+    static bool addNewPart(ContextPtr context, const PartLogEntry & part,
                           const ExecutionStatus & execution_status = {});
-    static bool addNewParts(ContextPtr context, const MutableDataPartsVector & parts, UInt64 elapsed_ns,
+
+    static bool addNewParts(ContextPtr context, const PartLogEntries & parts,
                            const ExecutionStatus & execution_status = {});
 };

--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@ -453,7 +453,7 @@ std::shared_ptr<SessionLog> Session::getSessionLog() const
 ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const
 {
    if (!user_id && getClientInfo().interface != ClientInfo::Interface::TCP_INTERSERVER)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Query context must be created after authentication");

    /// We can create a query context either from a session context or from a global context.
    bool from_session_context = static_cast<bool>(session_context);
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@ -161,6 +161,23 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool
    setupState(thread_group_);
 }

+ProfileEvents::Counters * ThreadStatus::attachProfileCountersScope(ProfileEvents::Counters * performance_counters_scope)
+{
+    ProfileEvents::Counters * prev_counters = current_performance_counters;
+
+    if (current_performance_counters == performance_counters_scope)
+        /// Allow to attach the same scope multiple times
+        return prev_counters;
+
+    /// Avoid cycles when exiting local scope and attaching back to current thread counters
+    if (performance_counters_scope != &performance_counters)
+        performance_counters_scope->setParent(&performance_counters);
+
+    current_performance_counters = performance_counters_scope;
+
+    return prev_counters;
+}
+
 void ThreadStatus::initPerformanceCounters()
 {
    performance_counters_finalized = false;
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@ -113,6 +113,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings(
        .read_in_order = query_info.input_order_info != nullptr,
        .use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree
            && (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1),
+        .enable_multiple_prewhere_read_steps = settings.enable_multiple_prewhere_read_steps,
    };
 }

@ -265,6 +266,8 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
        extension,
        parts_with_range,
        prewhere_info,
+        actions_settings,
+        reader_settings,
        required_columns,
        virt_column_names,
        min_marks_for_concurrent_read
@ -348,9 +351,9 @@ Pipe ReadFromMergeTree::readFromPool(
     if ((all_parts_are_remote
          && settings.allow_prefetched_read_pool_for_remote_filesystem
          && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.remote_fs_method))
-         || (!all_parts_are_local
+         || (all_parts_are_local
             && settings.allow_prefetched_read_pool_for_local_filesystem
-             && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.remote_fs_method)))
+             && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.local_fs_method)))
     {
         pool = std::make_shared<MergeTreePrefetchedReadPool>(
             max_streams,
@ -359,6 +362,7 @@ Pipe ReadFromMergeTree::readFromPool(
             std::move(parts_with_range),
             storage_snapshot,
             prewhere_info,
+             actions_settings,
             required_columns,
             virt_column_names,
             settings.preferred_block_size_bytes,
@ -377,6 +381,8 @@ Pipe ReadFromMergeTree::readFromPool(
             std::move(parts_with_range),
             storage_snapshot,
             prewhere_info,
+             actions_settings,
+             reader_settings,
             required_columns,
             virt_column_names,
             context,
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@ -554,6 +554,11 @@ void HTTPHandler::processQuery(
        std::string session_check = params.get("session_check", "");
        session->makeSessionContext(session_id, session_timeout, session_check == "1");
    }
+    else
+    {
+        /// We should create it even if we don't have a session_id
+        session->makeSessionContext();
+    }

    auto client_info = session->getClientInfo();
    auto context = session->makeQueryContext(std::move(client_info));
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@ -2,6 +2,7 @@

 #include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
+#include <Common/ProfileEventsScope.h>
 #include <Storages/StorageReplicatedMergeTree.h>

 namespace ProfileEvents
@ -290,9 +291,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()

    return {true, true, [this, stopwatch = *stopwatch_ptr] (const ExecutionStatus & execution_status)
    {
+        auto profile_counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(profile_counters.getPartiallyAtomicSnapshot());
        storage.writePartLog(
            PartLogElement::MERGE_PARTS, execution_status, stopwatch.elapsed(),
-            entry.new_part_name, part, parts, merge_mutate_entry.get());
+            entry.new_part_name, part, parts, merge_mutate_entry.get(), std::move(profile_counters_snapshot));
    }};
 }

--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@ -3,6 +3,7 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/StorageMergeTree.h>
 #include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
+#include <Common/ProfileEventsScope.h>

 namespace DB
 {
@ -27,6 +28,9 @@ void MergePlainMergeTreeTask::onCompleted()

 bool MergePlainMergeTreeTask::executeStep()
 {
+    /// Metrics will be saved in the thread_group.
+    ProfileEventsScope profile_events_scope(&profile_counters);
+
    /// Make out memory tracker a parent of current thread memory tracker
    MemoryTrackerThreadSwitcherPtr switcher;
    if (merge_list_entry)
@ -85,6 +89,7 @@ void MergePlainMergeTreeTask::prepare()

    write_part_log = [this] (const ExecutionStatus & execution_status)
    {
+        auto profile_counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(profile_counters.getPartiallyAtomicSnapshot());
        merge_task.reset();
        storage.writePartLog(
            PartLogElement::MERGE_PARTS,
@ -93,7 +98,8 @@ void MergePlainMergeTreeTask::prepare()
            future_part->name,
            new_part,
            future_part->parts,
-            merge_list_entry.get());
+            merge_list_entry.get(),
+            std::move(profile_counters_snapshot));
    };

    merge_task = storage.merger_mutator.mergePartsToTemporaryPart(
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h
@ -6,6 +6,7 @@
 #include <Storages/MergeTree/MergeMutateSelectedEntry.h>
 #include <Interpreters/MergeTreeTransactionHolder.h>

+
 namespace DB
 {

@ -48,7 +49,6 @@ public:
    }

 private:
-
    void prepare();
    void finish();

@ -85,6 +85,8 @@ private:

    MergeTreeTransactionHolder txn_holder;
    MergeTreeTransactionPtr txn;
+
+    ProfileEvents::Counters profile_counters;
 };


--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@ -47,7 +47,7 @@ IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm(
    const MergeTreeData & storage_,
    const StorageSnapshotPtr & storage_snapshot_,
    const PrewhereInfoPtr & prewhere_info_,
-    ExpressionActionsSettings actions_settings,
+    const ExpressionActionsSettings & actions_settings_,
    UInt64 max_block_size_rows_,
    UInt64 preferred_block_size_bytes_,
    UInt64 preferred_max_column_in_block_size_bytes_,
@ -57,7 +57,8 @@ IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm(
    : storage(storage_)
    , storage_snapshot(storage_snapshot_)
    , prewhere_info(prewhere_info_)
-    , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings))
+    , actions_settings(actions_settings_)
+    , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps))
    , max_block_size_rows(max_block_size_rows_)
    , preferred_block_size_bytes(preferred_block_size_bytes_)
    , preferred_max_column_in_block_size_bytes(preferred_max_column_in_block_size_bytes_)
@ -81,8 +82,9 @@ IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm(
    LOG_TEST(log, "PREWHERE actions: {}", (prewhere_actions ? prewhere_actions->dump() : std::string("<nullptr>")));
 }

+bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere);

-std::unique_ptr<PrewhereExprInfo> IMergeTreeSelectAlgorithm::getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings)
+std::unique_ptr<PrewhereExprInfo> IMergeTreeSelectAlgorithm::getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps)
 {
    std::unique_ptr<PrewhereExprInfo> prewhere_actions;
    if (prewhere_info)
@ -102,15 +104,19 @@ std::unique_ptr<PrewhereExprInfo> IMergeTreeSelectAlgorithm::getPrewhereActions(
            prewhere_actions->steps.emplace_back(std::move(row_level_filter_step));
        }

-        PrewhereExprStep prewhere_step
+        if (!enable_multiple_prewhere_read_steps ||
+            !tryBuildPrewhereSteps(prewhere_info, actions_settings, *prewhere_actions))
        {
-            .actions = std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions, actions_settings),
-            .column_name = prewhere_info->prewhere_column_name,
-            .remove_column = prewhere_info->remove_prewhere_column,
-            .need_filter = prewhere_info->need_filter
-        };
+            PrewhereExprStep prewhere_step
+            {
+                .actions = std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions, actions_settings),
+                .column_name = prewhere_info->prewhere_column_name,
+                .remove_column = prewhere_info->remove_prewhere_column,
+                .need_filter = prewhere_info->need_filter
+            };

-        prewhere_actions->steps.emplace_back(std::move(prewhere_step));
+            prewhere_actions->steps.emplace_back(std::move(prewhere_step));
+        }
    }

    return prewhere_actions;
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
@ -43,7 +43,7 @@ public:
        const MergeTreeData & storage_,
        const StorageSnapshotPtr & storage_snapshot_,
        const PrewhereInfoPtr & prewhere_info_,
-        ExpressionActionsSettings actions_settings,
+        const ExpressionActionsSettings & actions_settings,
        UInt64 max_block_size_rows_,
        UInt64 preferred_block_size_bytes_,
        UInt64 preferred_max_column_in_block_size_bytes_,
@ -71,6 +71,8 @@ public:

    virtual std::string getName() const = 0;

+    static std::unique_ptr<PrewhereExprInfo> getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps);
+
 protected:
    /// This struct allow to return block with no columns but with non-zero number of rows similar to Chunk
    struct BlockAndProgress
@ -101,8 +103,7 @@ protected:
    static void
    injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns);

-    static std::unique_ptr<PrewhereExprInfo> getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings);
-
+protected:
    static void initializeRangeReadersImpl(
         MergeTreeRangeReader & range_reader,
         std::deque<MergeTreeRangeReader> & pre_range_readers,
@ -138,6 +139,7 @@ protected:
    /// This step is added when the part has lightweight delete mask
    const PrewhereExprStep lightweight_delete_filter_step { nullptr, LightweightDeleteDescription::FILTER_COLUMN.name, true, true };
    PrewhereInfoPtr prewhere_info;
+    ExpressionActionsSettings actions_settings;
    std::unique_ptr<PrewhereExprInfo> prewhere_actions;

    UInt64 max_block_size_rows;
--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
@ -5,6 +5,7 @@
 #include <Core/NamesAndTypes.h>
 #include <Common/checkStackSize.h>
 #include <Common/typeid_cast.h>
+#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Columns/ColumnConst.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
@ -299,21 +300,22 @@ MergeTreeReadTaskColumns getReadTaskColumns(
    const Names & required_columns,
    const Names & system_columns,
    const PrewhereInfoPtr & prewhere_info,
+    const ExpressionActionsSettings & actions_settings,
+    const MergeTreeReaderSettings & reader_settings,
    bool with_subcolumns)
 {
-    Names column_names = required_columns;
-    Names pre_column_names;
+    Names column_to_read_after_prewhere = required_columns;

    /// Read system columns such as lightweight delete mask "_row_exists" if it is persisted in the part
    for (const auto & name : system_columns)
    {
        if (data_part_info_for_reader.getColumns().contains(name))
-            column_names.push_back(name);
+            column_to_read_after_prewhere.push_back(name);
    }

-    /// inject columns required for defaults evaluation
+    /// Inject columns required for defaults evaluation
    injectRequiredColumns(
-        data_part_info_for_reader, storage_snapshot, with_subcolumns, column_names);
+        data_part_info_for_reader, storage_snapshot, with_subcolumns, column_to_read_after_prewhere);

    MergeTreeReadTaskColumns result;
    auto options = GetColumnsOptions(GetColumnsOptions::All)
@ -324,45 +326,41 @@ MergeTreeReadTaskColumns getReadTaskColumns(

    if (prewhere_info)
    {
-        NameSet pre_name_set;
+        auto prewhere_actions = IMergeTreeSelectAlgorithm::getPrewhereActions(
+            prewhere_info, actions_settings, reader_settings.enable_multiple_prewhere_read_steps);

-        /// Add column reading steps:
-        /// 1. Columns for row level filter
-        if (prewhere_info->row_level_filter)
+        NameSet columns_from_previous_steps;
+
+        for (const auto & step : prewhere_actions->steps)
        {
-            Names row_filter_column_names = prewhere_info->row_level_filter->getRequiredColumnsNames();
+            Names step_column_names = step.actions->getActionsDAG().getRequiredColumnsNames();
+
            injectRequiredColumns(
-                data_part_info_for_reader, storage_snapshot, with_subcolumns, row_filter_column_names);
-            result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, row_filter_column_names));
-            pre_name_set.insert(row_filter_column_names.begin(), row_filter_column_names.end());
-        }
-
-        /// 2. Columns for prewhere
-        Names all_pre_column_names = prewhere_info->prewhere_actions->getRequiredColumnsNames();
-
-        injectRequiredColumns(
-             data_part_info_for_reader, storage_snapshot, with_subcolumns, all_pre_column_names);
-
-        for (const auto & name : all_pre_column_names)
-        {
-            if (pre_name_set.contains(name))
-                continue;
-            pre_column_names.push_back(name);
-            pre_name_set.insert(name);
+                data_part_info_for_reader, storage_snapshot, with_subcolumns, step_column_names);
+
+            Names columns_to_read_in_step;
+            for (const auto & name : step_column_names)
+            {
+                if (columns_from_previous_steps.contains(name))
+                    continue;
+                columns_to_read_in_step.push_back(name);
+                columns_from_previous_steps.insert(name);
+            }
+
+            result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, columns_to_read_in_step));
        }

+        /// Remove columns read in prewehere from the list of columns to read
        Names post_column_names;
-        for (const auto & name : column_names)
-            if (!pre_name_set.contains(name))
+        for (const auto & name : column_to_read_after_prewhere)
+            if (!columns_from_previous_steps.contains(name))
                post_column_names.push_back(name);

-        column_names = post_column_names;
+        column_to_read_after_prewhere = std::move(post_column_names);
    }

-    result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, pre_column_names));
-
-    /// 3. Rest of the requested columns
-    result.columns = storage_snapshot->getColumnsByNames(options, column_names);
+    /// Rest of the requested columns
+    result.columns = storage_snapshot->getColumnsByNames(options, column_to_read_after_prewhere);
    return result;
 }

--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
@ -13,6 +13,7 @@ namespace DB

 class MergeTreeData;
 struct MergeTreeReadTask;
+struct MergeTreeReaderSettings;
 struct MergeTreeBlockSizePredictor;
 class IMergeTreeDataPartInfoForReader;

@ -102,6 +103,8 @@ MergeTreeReadTaskColumns getReadTaskColumns(
    const Names & required_columns,
    const Names & system_columns,
    const PrewhereInfoPtr & prewhere_info,
+    const ExpressionActionsSettings & actions_settings,
+    const MergeTreeReaderSettings & reader_settings,
    bool with_subcolumns);

 struct MergeTreeBlockSizePredictor
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -10,6 +10,7 @@
 #include <Common/escapeForFileName.h>
 #include <Common/Increment.h>
 #include <Common/noexcept_scope.h>
+#include <Common/ProfileEventsScope.h>
 #include <Common/quoteString.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/SimpleIncrement.h>
@ -6430,17 +6431,21 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
    if (query_info.additional_filter_ast)
        return std::nullopt;

-    auto query_ptr = query_info.original_query;
+    auto query_ptr = query_info.query;
+    auto original_query_ptr = query_info.original_query;
+
    auto * select_query = query_ptr->as<ASTSelectQuery>();
-    if (!select_query)
+    auto * original_select_query = original_query_ptr->as<ASTSelectQuery>();
+
+    if (!original_select_query || !select_query)
        return std::nullopt;

    // Currently projections don't support final yet.
-    if (select_query->final())
+    if (select_query->final() || original_select_query->final())
        return std::nullopt;

    // Currently projections don't support sample yet.
-    if (select_query->sampleSize())
+    if (original_select_query->sampleSize())
        return std::nullopt;

    // Currently projection don't support deduplication when moving parts between shards.
@ -6448,24 +6453,24 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
        return std::nullopt;

    // Currently projections don't support ARRAY JOIN yet.
-    if (select_query->arrayJoinExpressionList().first)
+    if (original_select_query->arrayJoinExpressionList().first)
        return std::nullopt;

    // In order to properly analyze joins, aliases should be recognized. However, aliases get lost during projection analysis.
    // Let's disable projection if there are any JOIN clauses.
    // TODO: We need a better identifier resolution mechanism for projection analysis.
-    if (select_query->hasJoin())
+    if (original_select_query->hasJoin())
        return std::nullopt;

    // INTERPOLATE expressions may include aliases, so aliases should be preserved
-    if (select_query->interpolate() && !select_query->interpolate()->children.empty())
+    if (original_select_query->interpolate() && !original_select_query->interpolate()->children.empty())
        return std::nullopt;

    // Projections don't support grouping sets yet.
-    if (select_query->group_by_with_grouping_sets
-        || select_query->group_by_with_totals
-        || select_query->group_by_with_rollup
-        || select_query->group_by_with_cube)
+    if (original_select_query->group_by_with_grouping_sets
+        || original_select_query->group_by_with_totals
+        || original_select_query->group_by_with_rollup
+        || original_select_query->group_by_with_cube)
        return std::nullopt;

    auto query_options = SelectQueryOptions(
@ -6475,7 +6480,7 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
        ).ignoreProjections().ignoreAlias();

    InterpreterSelectQuery select(
-        query_ptr,
+        original_query_ptr,
        query_context,
        query_options,
        query_info.prepared_sets);
@ -7320,7 +7325,8 @@ void MergeTreeData::writePartLog(
    const String & new_part_name,
    const DataPartPtr & result_part,
    const DataPartsVector & source_parts,
-    const MergeListEntry * merge_entry)
+    const MergeListEntry * merge_entry,
+    std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters)
 try
 {
    auto table_id = getStorageID();
@ -7382,6 +7388,15 @@ try
        part_log_elem.peak_memory_usage = (*merge_entry)->memory_tracker.getPeak();
    }

+    if (profile_counters)
+    {
+        part_log_elem.profile_counters = profile_counters;
+    }
+    else
+    {
+        LOG_WARNING(log, "Profile counters are not set");
+    }
+
    part_log->add(part_log_elem);
 }
 catch (...)
@ -7517,6 +7532,7 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge
    {
        Stopwatch stopwatch;
        MutableDataPartPtr cloned_part;
+        ProfileEventsScope profile_events_scope;

        auto write_part_log = [&](const ExecutionStatus & execution_status)
        {
@ -7527,7 +7543,8 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge
                moving_part.part->name,
                cloned_part,
                {moving_part.part},
-                nullptr);
+                nullptr,
+                profile_events_scope.getSnapshot());
        };

        // Register in global moves list (StorageSystemMoves)
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@ -1301,7 +1301,8 @@ protected:
        const String & new_part_name,
        const DataPartPtr & result_part,
        const DataPartsVector & source_parts,
-        const MergeListEntry * merge_entry);
+        const MergeListEntry * merge_entry,
+        std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters);

    /// If part is assigned to merge or mutation (possibly replicated)
    /// Should be overridden by children, because they can have different
--- a/src/Storages/MergeTree/MergeTreeIOSettings.h
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.h
@ -29,6 +29,8 @@ struct MergeTreeReaderSettings
    bool apply_deleted_mask = true;
    /// Put reading task in a common I/O pool, return Async state on prepare()
    bool use_asynchronous_read_from_pool = false;
+    /// If PREWHERE has multiple conditions combined with AND, execute them in separate read/filtering steps.
+    bool enable_multiple_prewhere_read_steps = false;
 };

 struct MergeTreeWriterSettings
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
@ -31,6 +31,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
    RangesInDataParts && parts_,
    const StorageSnapshotPtr & storage_snapshot_,
    const PrewhereInfoPtr & prewhere_info_,
+    const ExpressionActionsSettings & actions_settings_,
    const Names & column_names_,
    const Names & virtual_column_names_,
    size_t preferred_block_size_bytes_,
@ -44,7 +45,6 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
    , header(storage_snapshot_->getSampleBlockForColumns(column_names_))
    , mark_cache(context_->getGlobalContext()->getMarkCache().get())
    , uncompressed_cache(use_uncompressed_cache_ ? context_->getGlobalContext()->getUncompressedCache().get() : nullptr)
-    , reader_settings(reader_settings_)
    , profile_callback([this](ReadBufferFromFileBase::ProfileInfo info_) { profileFeedback(info_); })
    , index_granularity_bytes(storage_settings_.index_granularity_bytes)
    , fixed_index_granularity(storage_settings_.index_granularity)
@ -52,6 +52,8 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
    , column_names(column_names_)
    , virtual_column_names(virtual_column_names_)
    , prewhere_info(prewhere_info_)
+    , actions_settings(actions_settings_)
+    , reader_settings(reader_settings_)
    , is_remote_read(is_remote_read_)
    , prefetch_threadpool(getContext()->getPrefetchThreadpool())
 {
@ -321,6 +323,8 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf
            column_names,
            virtual_column_names,
            prewhere_info,
+            actions_settings,
+            reader_settings,
            /* with_subcolumns */true);

        part_info->size_predictor = !predict_block_size_bytes
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Common/ThreadPool.h>
+#include <Interpreters/ExpressionActionsSettings.h>
 #include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/MergeTree/MergeTreeIOSettings.h>
 #include <Core/BackgroundSchedulePool.h>
@ -25,6 +26,7 @@ public:
        RangesInDataParts && parts_,
        const StorageSnapshotPtr & storage_snapshot_,
        const PrewhereInfoPtr & prewhere_info_,
+        const ExpressionActionsSettings & actions_settings_,
        const Names & column_names_,
        const Names & virtual_column_names_,
        size_t preferred_block_size_bytes_,
@ -78,7 +80,6 @@ private:
    Block header;
    MarkCache * mark_cache;
    UncompressedCache * uncompressed_cache;
-    MergeTreeReaderSettings reader_settings;
    ReadBufferFromFileBase::ProfileCallback profile_callback;
    size_t index_granularity_bytes;
    size_t fixed_index_granularity;
@ -87,6 +88,8 @@ private:
    const Names column_names;
    const Names virtual_column_names;
    PrewhereInfoPtr prewhere_info;
+    const ExpressionActionsSettings actions_settings;
+    const MergeTreeReaderSettings reader_settings;
    RangesInDataParts parts_ranges;

    [[ maybe_unused ]] const bool is_remote_read;
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@ -3,12 +3,14 @@
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnsCommon.h>
 #include <Common/TargetSpecific.h>
+#include <Core/UUID.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <base/range.h>
 #include <Interpreters/castColumn.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <bit>
+#include <boost/algorithm/string/replace.hpp>

 #ifdef __SSE2__
 #include <emmintrin.h>
@ -920,6 +922,39 @@ bool MergeTreeRangeReader::isCurrentRangeFinished() const
    return prev_reader ? prev_reader->isCurrentRangeFinished() : stream.isFinished();
 }

+
+/// When executing ExpressionActions on an empty block, it is not possible to determine the number of rows
+/// in the block for the new columns so the result block will have 0 rows and it will not match the rest of
+/// the columns in the ReadResult.
+/// The dummy column is added to maintain the information about the number of rows in the block and to produce
+/// the result block with the correct number of rows.
+String addDummyColumnWithRowCount(Block & block, size_t num_rows)
+{
+    bool has_columns = false;
+    for (const auto & column : block)
+    {
+        if (column.column)
+        {
+            assert(column.column->size() == num_rows);
+            has_columns = true;
+            break;
+        }
+    }
+
+    if (has_columns)
+        return {};
+
+    ColumnWithTypeAndName dummy_column;
+    dummy_column.column = DataTypeUInt8().createColumnConst(num_rows, Field(1));
+    dummy_column.type = std::make_shared<DataTypeUInt8>();
+    /// Generate a random name to avoid collisions with real columns.
+    dummy_column.name = "....dummy...." + toString(UUIDHelpers::generateV4());
+    block.insert(dummy_column);
+
+    return dummy_column.name;
+}
+
+
 MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, MarkRanges & ranges)
 {
    if (max_rows == 0)
@ -987,6 +1022,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
                for (const auto & col : read_result.additional_columns)
                    additional_columns.insert(col);

+                addDummyColumnWithRowCount(additional_columns, read_result.num_rows);
                merge_tree_reader->evaluateMissingDefaults(additional_columns, columns);
            }

@ -1308,8 +1344,17 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r
            Block additional_columns = block;

            if (prewhere_info->actions)
+            {
+                const String dummy_column = addDummyColumnWithRowCount(block, result.num_rows);
+
+                LOG_TEST(log, "Executing prewhere actions on block: {}", block.dumpStructure());
+
                prewhere_info->actions->execute(block);

+                if (!dummy_column.empty())
+                    block.erase(dummy_column);
+            }
+
            result.additional_columns.clear();
            /// Additional columns might only be needed if there are more steps in the chain.
            if (!last_reader_in_chain)
@ -1370,13 +1415,16 @@ std::string PrewhereExprInfo::dump() const
 {
    WriteBufferFromOwnString s;

+    const char indent[] = "\n      ";
    for (size_t i = 0; i < steps.size(); ++i)
    {
        s << "STEP " << i << ":\n"
-            << "  ACTIONS: " << (steps[i].actions ? steps[i].actions->dumpActions() : "nullptr") << "\n"
+            << "  ACTIONS: " << (steps[i].actions ?
+                (indent + boost::replace_all_copy(steps[i].actions->dumpActions(), "\n", indent)) :
+                "nullptr") << "\n"
            << "  COLUMN: " << steps[i].column_name << "\n"
            << "  REMOVE_COLUMN: " << steps[i].remove_column << "\n"
-            << "  NEED_FILTER: " << steps[i].need_filter << "\n";
+            << "  NEED_FILTER: " << steps[i].need_filter << "\n\n";
    }

    return s.str();
--- a/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp
@ -27,6 +27,8 @@ MergeTreeReadPool::MergeTreeReadPool(
    RangesInDataParts && parts_,
    const StorageSnapshotPtr & storage_snapshot_,
    const PrewhereInfoPtr & prewhere_info_,
+    const ExpressionActionsSettings & actions_settings_,
+    const MergeTreeReaderSettings & reader_settings_,
    const Names & column_names_,
    const Names & virtual_column_names_,
    ContextPtr context_,
@ -36,6 +38,8 @@ MergeTreeReadPool::MergeTreeReadPool(
    , virtual_column_names(virtual_column_names_)
    , min_marks_for_concurrent_read(min_marks_for_concurrent_read_)
    , prewhere_info(prewhere_info_)
+    , actions_settings(actions_settings_)
+    , reader_settings(reader_settings_)
    , parts_ranges(std::move(parts_))
    , predict_block_size_bytes(context_->getSettingsRef().preferred_block_size_bytes > 0)
    , do_not_steal_tasks(do_not_steal_tasks_)
@ -46,7 +50,8 @@ MergeTreeReadPool::MergeTreeReadPool(
    const auto per_part_sum_marks = fillPerPartInfo(
        parts_ranges, storage_snapshot, is_part_on_remote_disk,
        do_not_steal_tasks, predict_block_size_bytes,
-        column_names, virtual_column_names, prewhere_info, per_part_params);
+        column_names, virtual_column_names, prewhere_info,
+        actions_settings, reader_settings, per_part_params);

    fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges);
 }
@ -60,6 +65,8 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
    const Names & column_names,
    const Names & virtual_column_names,
    const PrewhereInfoPtr & prewhere_info,
+    const ExpressionActionsSettings & actions_settings,
+    const MergeTreeReaderSettings & reader_settings,
    std::vector<MergeTreeReadPool::PerPartParams> & per_part_params)
 {
    std::vector<size_t> per_part_sum_marks;
@ -86,7 +93,7 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(

        auto task_columns = getReadTaskColumns(
            LoadedMergeTreeDataPartInfoForReader(part.data_part), storage_snapshot,
-            column_names, virtual_column_names, prewhere_info, /*with_subcolumns=*/ true);
+            column_names, virtual_column_names, prewhere_info, actions_settings, reader_settings, /*with_subcolumns=*/ true);

        auto size_predictor = !predict_block_size_bytes ? nullptr
            : IMergeTreeSelectAlgorithm::getSizePredictor(part.data_part, task_columns, sample_block);
@ -105,7 +112,6 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
    return per_part_sum_marks;
 }

-
 MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread)
 {
    const std::lock_guard lock{mutex};
--- a/src/Storages/MergeTree/MergeTreeReadPool.h
+++ b/src/Storages/MergeTree/MergeTreeReadPool.h
@ -34,6 +34,8 @@ public:
        RangesInDataParts && parts_,
        const StorageSnapshotPtr & storage_snapshot_,
        const PrewhereInfoPtr & prewhere_info_,
+        const ExpressionActionsSettings & actions_settings_,
+        const MergeTreeReaderSettings & reader_settings_,
        const Names & column_names_,
        const Names & virtual_column_names_,
        ContextPtr context_,
@ -95,6 +97,8 @@ public:
        const Names & column_names,
        const Names & virtual_column_names,
        const PrewhereInfoPtr & prewhere_info,
+        const ExpressionActionsSettings & actions_settings_,
+        const MergeTreeReaderSettings & reader_settings_,
        std::vector<MergeTreeReadPool::PerPartParams> & per_part_params);

 private:
@ -108,6 +112,8 @@ private:
    const Names virtual_column_names;
    size_t min_marks_for_concurrent_read{0};
    PrewhereInfoPtr prewhere_info;
+    ExpressionActionsSettings actions_settings;
+    MergeTreeReaderSettings reader_settings;
    RangesInDataParts parts_ranges;
    bool predict_block_size_bytes;
    bool do_not_steal_tasks;
@ -165,12 +171,16 @@ public:
        ParallelReadingExtension extension_,
        const RangesInDataParts & parts_,
        const PrewhereInfoPtr & prewhere_info_,
+        const ExpressionActionsSettings & actions_settings_,
+        const MergeTreeReaderSettings & reader_settings_,
        const Names & column_names_,
        const Names & virtual_column_names_,
        size_t min_marks_for_concurrent_read_)
        : extension(extension_)
        , threads(threads_)
        , prewhere_info(prewhere_info_)
+        , actions_settings(actions_settings_)
+        , reader_settings(reader_settings_)
        , storage_snapshot(storage_snapshot_)
        , min_marks_for_concurrent_read(min_marks_for_concurrent_read_)
        , column_names(column_names_)
@ -179,7 +189,8 @@ public:
    {
        MergeTreeReadPool::fillPerPartInfo(
            parts_ranges, storage_snapshot, is_part_on_remote_disk, do_not_steal_tasks,
-            predict_block_size_bytes, column_names, virtual_column_names, prewhere_info, per_part_params);
+            predict_block_size_bytes, column_names, virtual_column_names, prewhere_info,
+            actions_settings, reader_settings, per_part_params);

        extension.all_callback({
            .description = parts_ranges.getDescriptions(),
@ -206,6 +217,8 @@ private:
    std::mutex mutex;

    PrewhereInfoPtr prewhere_info;
+    ExpressionActionsSettings actions_settings;
+    MergeTreeReaderSettings reader_settings;
    StorageSnapshotPtr storage_snapshot;
    size_t min_marks_for_concurrent_read;
    const Names column_names;
--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@ -19,7 +19,7 @@ MergeTreeSelectAlgorithm::MergeTreeSelectAlgorithm(
    MarkRanges mark_ranges_,
    bool use_uncompressed_cache_,
    const PrewhereInfoPtr & prewhere_info_,
-    ExpressionActionsSettings actions_settings,
+    const ExpressionActionsSettings & actions_settings_,
    const MergeTreeReaderSettings & reader_settings_,
    MergeTreeInOrderReadPoolParallelReplicasPtr pool_,
    const Names & virt_column_names_,
@ -27,7 +27,7 @@ MergeTreeSelectAlgorithm::MergeTreeSelectAlgorithm(
    bool has_limit_below_one_block_)
    : IMergeTreeSelectAlgorithm{
        storage_snapshot_->getSampleBlockForColumns(required_columns_),
-        storage_, storage_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_,
+        storage_, storage_snapshot_, prewhere_info_, actions_settings_, max_block_size_rows_,
        preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_,
        reader_settings_, use_uncompressed_cache_, virt_column_names_},
    required_columns{std::move(required_columns_)},
@ -46,7 +46,7 @@ void MergeTreeSelectAlgorithm::initializeReaders()
 {
    task_columns = getReadTaskColumns(
        LoadedMergeTreeDataPartInfoForReader(data_part), storage_snapshot,
-        required_columns, virt_column_names, prewhere_info, /*with_subcolumns=*/ true);
+        required_columns, virt_column_names, prewhere_info, actions_settings, reader_settings, /*with_subcolumns=*/ true);

    /// Will be used to distinguish between PREWHERE and WHERE columns when applying filter
    const auto & column_names = task_columns.columns.getNames();
--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h
@ -28,7 +28,7 @@ public:
        MarkRanges mark_ranges,
        bool use_uncompressed_cache,
        const PrewhereInfoPtr & prewhere_info,
-        ExpressionActionsSettings actions_settings,
+        const ExpressionActionsSettings & actions_settings_,
        const MergeTreeReaderSettings & reader_settings,
        MergeTreeInOrderReadPoolParallelReplicasPtr pool_,
        const Names & virt_column_names = {},
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@ -162,7 +162,6 @@ struct Settings;
    M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \
    M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for Zero-copy table-independet info.", 0) \
    M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \
-    \
    /** Compress marks and primary key. */ \
    M(Bool, compress_marks, false, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \
    M(Bool, compress_primary_key, false, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \
--- a/src/Storages/MergeTree/MergeTreeSink.cpp
+++ b/src/Storages/MergeTree/MergeTreeSink.cpp
@ -3,6 +3,7 @@
 #include <Storages/StorageMergeTree.h>
 #include <Interpreters/PartLog.h>
 #include <DataTypes/ObjectUtils.h>
+#include <Common/ProfileEventsScope.h>

 namespace ProfileEvents
 {
@ -47,6 +48,7 @@ struct MergeTreeSink::DelayedChunk
        MergeTreeDataWriter::TemporaryPart temp_part;
        UInt64 elapsed_ns;
        String block_dedup_token;
+        ProfileEvents::Counters part_counters;
    };

    std::vector<Partition> partitions;
@ -70,12 +72,18 @@ void MergeTreeSink::consume(Chunk chunk)

    for (auto & current_block : part_blocks)
    {
-        Stopwatch watch;
-        String block_dedup_token;
+        ProfileEvents::Counters part_counters;

-        auto temp_part = storage.writer.writeTempPart(current_block, metadata_snapshot, context);
+        UInt64 elapsed_ns = 0;
+        MergeTreeDataWriter::TemporaryPart temp_part;

-        UInt64 elapsed_ns = watch.elapsed();
+        {
+            ProfileEventsScope scoped_attach(&part_counters);
+
+            Stopwatch watch;
+            temp_part = storage.writer.writeTempPart(current_block, metadata_snapshot, context);
+            elapsed_ns = watch.elapsed();
+        }

        /// If optimize_on_insert setting is true, current_block could become empty after merge
        /// and we didn't create part.
@ -85,6 +93,7 @@ void MergeTreeSink::consume(Chunk chunk)
        if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite())
            support_parallel_write = true;

+        String block_dedup_token;
        if (storage.getDeduplicationLog())
        {
            const String & dedup_token = settings.insert_deduplication_token;
@ -119,7 +128,8 @@ void MergeTreeSink::consume(Chunk chunk)
        {
            .temp_part = std::move(temp_part),
            .elapsed_ns = elapsed_ns,
-            .block_dedup_token = std::move(block_dedup_token)
+            .block_dedup_token = std::move(block_dedup_token),
+            .part_counters = std::move(part_counters),
        });
    }

@ -135,6 +145,8 @@ void MergeTreeSink::finishDelayedChunk()

    for (auto & partition : delayed_chunk->partitions)
    {
+        ProfileEventsScope scoped_attach(&partition.part_counters);
+
        partition.temp_part.finalize();

        auto & part = partition.temp_part.part;
@ -168,7 +180,8 @@ void MergeTreeSink::finishDelayedChunk()
        /// Part can be deduplicated, so increment counters and add to part log only if it's really added
        if (added)
        {
-            PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns);
+            auto counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(partition.part_counters.getPartiallyAtomicSnapshot());
+            PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot));
            storage.incrementInsertedPartsProfileEvent(part->getType());

            /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'.
--- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
+++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
@ -0,0 +1,347 @@
+#include <Functions/CastOverloadResolver.h>
+#include <Functions/FunctionsLogical.h>
+#include <Storages/SelectQueryInfo.h>
+#include <Storages/MergeTree/MergeTreeRangeReader.h>
+#include <Interpreters/ExpressionActions.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+
+/// Stores the list of columns required to compute a node in the DAG.
+struct NodeInfo
+{
+    NameSet required_columns;
+};
+
+/// Fills the list of required columns for a node in the DAG.
+void fillRequiredColumns(const ActionsDAG::Node * node, std::unordered_map<const ActionsDAG::Node *, NodeInfo> & nodes_info)
+{
+    if (nodes_info.contains(node))
+        return;
+
+    auto & node_info = nodes_info[node];
+
+    if (node->type == ActionsDAG::ActionType::INPUT)
+    {
+        node_info.required_columns.insert(node->result_name);
+        return;
+    }
+
+    for (const auto & child : node->children)
+    {
+        fillRequiredColumns(child, nodes_info);
+        const auto & child_info = nodes_info[child];
+        node_info.required_columns.insert(child_info.required_columns.begin(), child_info.required_columns.end());
+    }
+}
+
+/// Stores information about a node that has already been cloned or added to one of the new DAGs.
+/// This allows to avoid cloning the same sub-DAG into multiple step DAGs but reference previously cloned nodes from earlier steps.
+struct DAGNodeRef
+{
+    ActionsDAGPtr dag;
+    const ActionsDAG::Node * node;
+};
+
+/// Result name -> DAGNodeRef
+using OriginalToNewNodeMap = std::unordered_map<String, DAGNodeRef>;
+
+/// Clones the part of original DAG responsible for computing the original_dag_node and adds it to the new DAG.
+const ActionsDAG::Node & addClonedDAGToDAG(const ActionsDAG::Node * original_dag_node, ActionsDAGPtr new_dag, OriginalToNewNodeMap & node_remap)
+{
+    const String & node_name = original_dag_node->result_name;
+    /// Look for the node in the map of already known nodes
+    if (node_remap.contains(node_name))
+    {
+        /// If the node is already in the new DAG, return it
+        const auto & node_ref = node_remap.at(node_name);
+        if (node_ref.dag == new_dag)
+            return *node_ref.node;
+
+        /// If the node is known from the previous steps, add it as an input, except for constants
+        if (original_dag_node->type != ActionsDAG::ActionType::COLUMN)
+        {
+            node_ref.dag->addOrReplaceInOutputs(*node_ref.node);
+            const auto & new_node = new_dag->addInput(node_ref.node->result_name, node_ref.node->result_type);
+            node_remap[node_name] = {new_dag, &new_node}; /// TODO: here we update the node reference. Is it always correct?
+            return new_node;
+        }
+    }
+
+    /// If the node is an input, add it as an input
+    if (original_dag_node->type == ActionsDAG::ActionType::INPUT)
+    {
+        const auto & new_node = new_dag->addInput(original_dag_node->result_name, original_dag_node->result_type);
+        node_remap[node_name] = {new_dag, &new_node};
+        return new_node;
+    }
+
+    /// If the node is a column, add it as an input
+    if (original_dag_node->type == ActionsDAG::ActionType::COLUMN)
+    {
+        const auto & new_node = new_dag->addColumn(
+            ColumnWithTypeAndName(original_dag_node->column, original_dag_node->result_type, original_dag_node->result_name));
+        node_remap[node_name] = {new_dag, &new_node};
+        return new_node;
+    }
+
+    /// TODO: Do we need to handle ALIAS nodes in cloning?
+
+    /// If the node is a function, add it as a function and add its children
+    if (original_dag_node->type == ActionsDAG::ActionType::FUNCTION)
+    {
+        ActionsDAG::NodeRawConstPtrs new_children;
+        for (const auto & child : original_dag_node->children)
+        {
+            const auto & new_child = addClonedDAGToDAG(child, new_dag, node_remap);
+            new_children.push_back(&new_child);
+        }
+
+        const auto & new_node = new_dag->addFunction(original_dag_node->function_base, new_children, original_dag_node->result_name);
+        node_remap[node_name] = {new_dag, &new_node};
+        return new_node;
+    }
+
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected node type in PREWHERE actions: {}", original_dag_node->type);
+}
+
+const ActionsDAG::Node & addFunction(
+        ActionsDAGPtr new_dag,
+        const FunctionOverloadResolverPtr & function,
+        ActionsDAG::NodeRawConstPtrs children,
+        OriginalToNewNodeMap & node_remap)
+{
+    const auto & new_node = new_dag->addFunction(function, children, "");
+    node_remap[new_node.result_name] = {new_dag, &new_node};
+    return new_node;
+}
+
+/// Adds a CAST node with the regular name ("CAST(...)") or with the provided name.
+/// This is different from ActionsDAG::addCast() because it set the name equal to the original name effectively hiding the value before cast,
+/// but it might be required for further steps with its original uncasted type.
+const ActionsDAG::Node & addCast(
+        ActionsDAGPtr dag,
+        const ActionsDAG::Node & node_to_cast,
+        const String & type_name,
+        OriginalToNewNodeMap & node_remap)
+{
+    if (node_to_cast.result_type->getName() == type_name)
+        return node_to_cast;
+
+    Field cast_type_constant_value(type_name);
+
+    ColumnWithTypeAndName column;
+    column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
+    column.type = std::make_shared<DataTypeString>();
+
+    const auto * cast_type_constant_node = &dag->addColumn(std::move(column));
+    ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node};
+    FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl();
+
+    return addFunction(dag, func_builder_cast, std::move(children), node_remap);
+}
+
+/// Normalizes the filter node by adding AND with a constant true.
+/// This:
+/// 1. produces a result with the proper Nullable or non-Nullable UInt8 type and
+/// 2. makes sure that the result contains only 0 or 1 values even if the source column contains non-boolean values.
+const ActionsDAG::Node & addAndTrue(
+    ActionsDAGPtr dag,
+    const ActionsDAG::Node & filter_node_to_normalize,
+    OriginalToNewNodeMap & node_remap)
+{
+    Field const_true_value(true);
+
+    ColumnWithTypeAndName const_true_column;
+    const_true_column.column = DataTypeUInt8().createColumnConst(0, const_true_value);
+    const_true_column.type = std::make_shared<DataTypeUInt8>();
+
+    const auto * const_true_node = &dag->addColumn(std::move(const_true_column));
+    ActionsDAG::NodeRawConstPtrs children = {&filter_node_to_normalize, const_true_node};
+    FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+    return addFunction(dag, func_builder_and, children, node_remap);
+}
+
+}
+
+/// We want to build a sequence of steps that will compute parts of the prewhere condition.
+/// Each step reads some new columns and computes some new expressions and a filter condition.
+/// The last step computes the final filter condition and the remaining expressions that are required for the main query.
+/// The goal of this is to, when it is possible, filter out many rows in early steps so that the remaining steps will
+/// read less data from the storage.
+/// NOTE: The result of executing the steps is exactly the same as if we would execute the original DAG in single step.
+///
+/// The steps are built in the following way:
+/// 1. List all condition nodes that are combined with AND into PREWHERE condition
+/// 2. Collect the set of columns that are used in each condition
+/// 3. Sort condition nodes by the number of columns used in them and the overall size of those columns
+/// 4. Group conditions with the same set of columns into a single read/compute step
+/// 5. Build DAGs for each step:
+///    - DFS from the condition root node:
+///      - If the node was not computed yet, add it to the DAG and traverse its children
+///      - If the node was already computed by one of the previous steps, add it as output for that step and as input for the current step
+///      - If the node was already computed by the current step just stop traversing
+/// 6. Find all outputs of the original DAG
+/// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed
+/// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4
+bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere)
+{
+    if (!prewhere_info || !prewhere_info->prewhere_actions)
+        return true;
+
+    Poco::Logger * log = &Poco::Logger::get("tryBuildPrewhereSteps");
+
+    LOG_TRACE(log, "Original PREWHERE DAG:\n{}", prewhere_info->prewhere_actions->dumpDAG());
+
+    /// 1. List all condition nodes that are combined with AND into PREWHERE condition
+    const auto & condition_root = prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name);
+    const bool is_conjunction = (condition_root.type == ActionsDAG::ActionType::FUNCTION && condition_root.function_base->getName() == "and");
+    if (!is_conjunction)
+        return false;
+    auto condition_nodes = condition_root.children;
+
+    /// 2. Collect the set of columns that are used in the condition
+    std::unordered_map<const ActionsDAG::Node *, NodeInfo> nodes_info;
+    for (const auto & node : condition_nodes)
+    {
+        fillRequiredColumns(node, nodes_info);
+    }
+
+    /// 3. Sort condition nodes by the number of columns used in them and the overall size of those columns
+    /// TODO: not sorting for now because the conditions are already sorted by Where Optimizer
+
+    /// 4. Group conditions with the same set of columns into a single read/compute step
+    std::vector<std::vector<const ActionsDAG::Node *>> condition_groups;
+    for (const auto & node : condition_nodes)
+    {
+        const auto & node_info = nodes_info[node];
+        if (!condition_groups.empty() && nodes_info[condition_groups.back().back()].required_columns == node_info.required_columns)
+            condition_groups.back().push_back(node);    /// Add to the last group
+        else
+            condition_groups.push_back({node}); /// Start new group
+    }
+
+    /// 5. Build DAGs for each step
+    struct Step
+    {
+        ActionsDAGPtr actions;
+        String column_name;
+    };
+    std::vector<Step> steps;
+
+    OriginalToNewNodeMap node_remap;
+
+    for (const auto & condition_group : condition_groups)
+    {
+        ActionsDAGPtr step_dag = std::make_shared<ActionsDAG>();
+        String result_name;
+
+        std::vector<const ActionsDAG::Node *> new_condition_nodes;
+        for (const auto * node : condition_group)
+        {
+            const auto & node_in_new_dag = addClonedDAGToDAG(node, step_dag, node_remap);
+            new_condition_nodes.push_back(&node_in_new_dag);
+        }
+
+        if (new_condition_nodes.size() > 1)
+        {
+            /// Add AND function to combine the conditions
+            FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+            const auto & and_function_node = addFunction(step_dag, func_builder_and, new_condition_nodes, node_remap);
+            step_dag->addOrReplaceInOutputs(and_function_node);
+            result_name = and_function_node.result_name;
+        }
+        else
+        {
+            const auto & result_node = *new_condition_nodes.front();
+            /// Check if explicit cast is needed for the condition to serve as a filter.
+            const auto result_type_name = result_node.result_type->getName();
+            if (result_type_name == "UInt8" ||
+                result_type_name == "Nullable(UInt8)" ||
+                result_type_name == "LowCardinality(UInt8)" ||
+                result_type_name == "LowCardinality(Nullable(UInt8))")
+            {
+                /// No need to cast
+                step_dag->addOrReplaceInOutputs(result_node);
+                result_name = result_node.result_name;
+            }
+            else
+            {
+                /// Build "condition AND True" expression to "cast" the condition to UInt8 or Nullable(UInt8) depending on its type.
+                const auto & cast_node = addAndTrue(step_dag, result_node, node_remap);
+                step_dag->addOrReplaceInOutputs(cast_node);
+                result_name = cast_node.result_name;
+            }
+        }
+
+        steps.push_back({step_dag, result_name});
+    }
+
+    /// 6. Find all outputs of the original DAG
+    auto original_outputs = prewhere_info->prewhere_actions->getOutputs();
+    /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed
+    /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4
+    NameSet all_output_names;
+    for (const auto * output : original_outputs)
+    {
+        all_output_names.insert(output->result_name);
+        if (node_remap.contains(output->result_name))
+        {
+            const auto & new_node_info = node_remap[output->result_name];
+            new_node_info.dag->addOrReplaceInOutputs(*new_node_info.node);
+        }
+        else if (output->result_name == prewhere_info->prewhere_column_name)
+        {
+            /// Special case for final PREWHERE column: it is an AND combination of all conditions,
+            /// but we have only the condition for the last step here. We know that the combined filter is equivalent to
+            /// to the last condition after filters from previous steps are applied. We just need to CAST the last condition
+            /// to the type of combined filter. We do this in 2 steps:
+            /// 1. AND the last condition with constant True. This is needed to make sure that in the last step filter has UInt8 type
+            ///    but contains values other than 0 and 1 (e.g. if it is (number%5) it contains 2,3,4)
+            /// 2. CAST the result to the exact type of the PREWHERE column from the original DAG
+            const auto & last_step_result_node_info = node_remap[steps.back().column_name];
+            auto & last_step_dag = steps.back().actions;
+            /// Build AND(last_step_result_node, true)
+            const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node_info.node, node_remap);
+            /// Build CAST(and_node, type of PREWHERE column)
+            const auto & cast_node = addCast(last_step_dag, and_node, output->result_type->getName(), node_remap);
+            /// Add alias for the result with the name of the PREWHERE column
+            const auto & prewhere_result_node = last_step_dag->addAlias(cast_node, output->result_name);
+            last_step_dag->addOrReplaceInOutputs(prewhere_result_node);
+        }
+        else
+        {
+            const auto & node_in_new_dag = addClonedDAGToDAG(output, steps.back().actions, node_remap);
+            steps.back().actions->addOrReplaceInOutputs(node_in_new_dag);
+        }
+    }
+
+    /// 9. Build PrewhereExprInfo
+    {
+        for (const auto & step : steps)
+        {
+            prewhere.steps.push_back(
+            {
+                .actions = std::make_shared<ExpressionActions>(step.actions, actions_settings),
+                .column_name = step.column_name,
+                .remove_column = !all_output_names.contains(step.column_name), /// Don't remove if it's in the list of original outputs
+                .need_filter = false,
+            });
+        }
+        prewhere.steps.back().need_filter = prewhere_info->need_filter;
+    }
+
+    LOG_TRACE(log, "Resulting PREWHERE:\n{}", prewhere.dump());
+
+    return true;
+}
+
+}
--- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp
@ -18,11 +18,11 @@ MergeTreeThreadSelectAlgorithm::MergeTreeThreadSelectAlgorithm(
    const StorageSnapshotPtr & storage_snapshot_,
    bool use_uncompressed_cache_,
    const PrewhereInfoPtr & prewhere_info_,
-    ExpressionActionsSettings actions_settings,
+    const ExpressionActionsSettings & actions_settings_,
    const MergeTreeReaderSettings & reader_settings_,
    const Names & virt_column_names_)
    : IMergeTreeSelectAlgorithm{
-        pool_->getHeader(), storage_, storage_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_,
+        pool_->getHeader(), storage_, storage_snapshot_, prewhere_info_, actions_settings_, max_block_size_rows_,
        preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_,
        reader_settings_, use_uncompressed_cache_, virt_column_names_},
    thread{thread_},
--- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h
@ -25,7 +25,7 @@ public:
        const StorageSnapshotPtr & storage_snapshot_,
        bool use_uncompressed_cache_,
        const PrewhereInfoPtr & prewhere_info_,
-        ExpressionActionsSettings actions_settings,
+        const ExpressionActionsSettings & actions_settings_,
        const MergeTreeReaderSettings & reader_settings_,
        const Names & virt_column_names_);

--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@ -41,6 +41,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
    , block_with_constants{KeyCondition::getBlockWithConstants(query_info.query->clone(), query_info.syntax_analyzer_result, context)}
    , log{log_}
    , column_sizes{std::move(column_sizes_)}
+    , move_all_conditions_to_prewhere(context->getSettingsRef().move_all_conditions_to_prewhere)
 {
    for (const auto & name : queried_columns)
    {
@ -272,23 +273,26 @@ void MergeTreeWhereOptimizer::optimize(ASTSelectQuery & select) const
        if (!it->viable)
            break;

-        bool moved_enough = false;
-        if (total_size_of_queried_columns > 0)
+        if (!move_all_conditions_to_prewhere)
        {
-            /// If we know size of queried columns use it as threshold. 10% ratio is just a guess.
-            moved_enough = total_size_of_moved_conditions > 0
-                && (total_size_of_moved_conditions + it->columns_size) * 10 > total_size_of_queried_columns;
-        }
-        else
-        {
-            /// Otherwise, use number of moved columns as a fallback.
-            /// It can happen, if table has only compact parts. 25% ratio is just a guess.
-            moved_enough = total_number_of_moved_columns > 0
-                && (total_number_of_moved_columns + it->identifiers.size()) * 4 > queried_columns.size();
-        }
+            bool moved_enough = false;
+            if (total_size_of_queried_columns > 0)
+            {
+                /// If we know size of queried columns use it as threshold. 10% ratio is just a guess.
+                moved_enough = total_size_of_moved_conditions > 0
+                    && (total_size_of_moved_conditions + it->columns_size) * 10 > total_size_of_queried_columns;
+            }
+            else
+            {
+                /// Otherwise, use number of moved columns as a fallback.
+                /// It can happen, if table has only compact parts. 25% ratio is just a guess.
+                moved_enough = total_number_of_moved_columns > 0
+                    && (total_number_of_moved_columns + it->identifiers.size()) * 4 > queried_columns.size();
+            }

-        if (moved_enough)
-            break;
+            if (moved_enough)
+                break;
+        }

        move_condition(it);
    }
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@ -111,6 +111,7 @@ private:
    std::unordered_map<std::string, UInt64> column_sizes;
    UInt64 total_size_of_queried_columns = 0;
    NameSet array_joined_names;
+    const bool move_all_conditions_to_prewhere = false;
 };


--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@ -184,9 +184,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()

    return {true, true, [this] (const ExecutionStatus & execution_status)
    {
+        auto profile_counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(profile_counters.getPartiallyAtomicSnapshot());
        storage.writePartLog(
            PartLogElement::MUTATE_PART, execution_status, stopwatch_ptr->elapsed(),
-            entry.new_part_name, new_part, future_mutated_part->parts, merge_mutate_entry.get());
+            entry.new_part_name, new_part, future_mutated_part->parts, merge_mutate_entry.get(), std::move(profile_counters_snapshot));
    }};
 }

--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@ -2,6 +2,7 @@

 #include <Storages/StorageMergeTree.h>
 #include <Interpreters/TransactionLog.h>
+#include <Common/ProfileEventsScope.h>

 namespace DB
 {
@ -38,6 +39,7 @@ void MutatePlainMergeTreeTask::prepare()

    write_part_log = [this] (const ExecutionStatus & execution_status)
    {
+        auto profile_counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(profile_counters.getPartiallyAtomicSnapshot());
        mutate_task.reset();
        storage.writePartLog(
            PartLogElement::MUTATE_PART,
@ -46,7 +48,8 @@ void MutatePlainMergeTreeTask::prepare()
            future_part->name,
            new_part,
            future_part->parts,
-            merge_list_entry.get());
+            merge_list_entry.get(),
+            std::move(profile_counters_snapshot));
    };

    fake_query_context = Context::createCopy(storage.getContext());
@ -58,8 +61,12 @@ void MutatePlainMergeTreeTask::prepare()
            time(nullptr), fake_query_context, merge_mutate_entry->txn, merge_mutate_entry->tagger->reserved_space, table_lock_holder);
 }

+
 bool MutatePlainMergeTreeTask::executeStep()
 {
+    /// Metrics will be saved in the local profile_counters.
+    ProfileEventsScope profile_events_scope(&profile_counters);
+
    /// Make out memory tracker a parent of current thread memory tracker
    MemoryTrackerThreadSwitcherPtr switcher;
    if (merge_list_entry)
@ -123,5 +130,4 @@ bool MutatePlainMergeTreeTask::executeStep()
    return false;
 }

-
 }
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h
@ -9,6 +9,7 @@
 #include <Storages/MutationCommands.h>
 #include <Storages/MergeTree/MergeMutateSelectedEntry.h>

+
 namespace DB
 {

@ -76,6 +77,8 @@ private:

    ContextMutablePtr fake_query_context;
    MutateTaskPtr mutate_task;
+
+    ProfileEvents::Counters profile_counters;
 };


--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@ -20,6 +20,7 @@
 #include <Storages/MutationCommands.h>
 #include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
 #include <boost/algorithm/string/replace.hpp>
+#include <Common/ProfileEventsScope.h>


 namespace CurrentMetrics
@ -907,6 +908,7 @@ public:
        /// Need execute again
        return true;
    }
+
 private:
    String name;
    MergeTreeData::MutableDataPartsVector parts;
@ -1262,6 +1264,7 @@ private:
    std::unique_ptr<PartMergerWriter> part_merger_writer_task;
 };

+
 class MutateSomePartColumnsTask : public IExecutableTask
 {
 public:
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
@ -2,6 +2,7 @@

 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeQueue.h>
+#include <Common/ProfileEventsScope.h>


 namespace DB
@ -29,6 +30,9 @@ void ReplicatedMergeMutateTaskBase::onCompleted()

 bool ReplicatedMergeMutateTaskBase::executeStep()
 {
+    /// Metrics will be saved in the local profile_counters.
+    ProfileEventsScope profile_events_scope(&profile_counters);
+
    std::exception_ptr saved_exception;

    bool retryable_error = false;
@ -83,7 +87,6 @@ bool ReplicatedMergeMutateTaskBase::executeStep()
        saved_exception = std::current_exception();
    }

-
    if (!retryable_error && saved_exception)
    {
        std::lock_guard lock(storage.queue.state_mutex);
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
@ -5,6 +5,7 @@
 #include <Storages/MergeTree/IExecutableTask.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeQueue.h>

+
 namespace DB
 {

@ -59,9 +60,10 @@ protected:
    MergeList::EntryPtr merge_mutate_entry{nullptr};
    Poco::Logger * log;
    StorageReplicatedMergeTree & storage;
+    /// ProfileEvents for current part will be stored here
+    ProfileEvents::Counters profile_counters;

 private:
-
    enum class CheckExistingPartResult
    {
        PART_EXISTS,
@ -69,7 +71,7 @@ private:
    };

    CheckExistingPartResult checkExistingPart();
-    bool executeImpl() ;
+    bool executeImpl();

    enum class State
    {
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@ -2,6 +2,7 @@
 #include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
 #include <Interpreters/PartLog.h>
+#include <Common/ProfileEventsScope.h>
 #include <Common/SipHash.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ThreadFuzzer.h>
@ -48,14 +49,21 @@ struct ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk
        BlockIDsType block_id;
        BlockWithPartition block_with_partition;
        std::unordered_map<String, std::vector<size_t>> block_id_to_offset_idx;
+        ProfileEvents::Counters part_counters;

        Partition() = default;
-        Partition(Poco::Logger * log_, MergeTreeDataWriter::TemporaryPart && temp_part_, UInt64 elapsed_ns_, BlockIDsType && block_id_, BlockWithPartition && block_)
+        Partition(Poco::Logger * log_,
+                  MergeTreeDataWriter::TemporaryPart && temp_part_,
+                  UInt64 elapsed_ns_,
+                  BlockIDsType && block_id_,
+                  BlockWithPartition && block_,
+                  ProfileEvents::Counters && part_counters_)
            : log(log_),
              temp_part(std::move(temp_part_)),
              elapsed_ns(elapsed_ns_),
              block_id(std::move(block_id_)),
-              block_with_partition(std::move(block_))
+              block_with_partition(std::move(block_)),
+              part_counters(std::move(part_counters_))
        {
                initBlockIDMap();
        }
@ -186,8 +194,9 @@ std::vector<Int64> testSelfDeduplicate(std::vector<Int64> data, std::vector<size
    Block block({ColumnWithTypeAndName(std::move(column), DataTypePtr(new DataTypeInt64()), "a")});

    BlockWithPartition block1(std::move(block), Row(), std::move(offsets));
+    ProfileEvents::Counters profile_counters;
    ReplicatedMergeTreeSinkImpl<true>::DelayedChunk::Partition part(
-        &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1));
+        &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::move(profile_counters));

    part.filterSelfDuplicate();

@ -411,6 +420,9 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
    {
        Stopwatch watch;

+        ProfileEvents::Counters part_counters;
+        auto profile_events_scope = std::make_unique<ProfileEventsScope>(&part_counters);
+
        /// Write part to the filesystem under temporary name. Calculate a checksum.

        auto temp_part = storage.writer.writeTempPart(current_block, metadata_snapshot, context);
@ -452,6 +464,7 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
            LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num));
        }

+        profile_events_scope.reset();
        UInt64 elapsed_ns = watch.elapsed();

        size_t max_insert_delayed_streams_for_parallel_write = DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE;
@ -472,12 +485,14 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
            partitions = DelayedPartitions{};
        }

+
        partitions.emplace_back(DelayedPartition(
            log,
            std::move(temp_part),
            elapsed_ns,
            std::move(block_id),
-            std::move(current_block)
+            std::move(current_block),
+            std::move(part_counters) /// profile_events_scope must be reset here.
        ));
    }

@ -503,6 +518,8 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF

    for (auto & partition : delayed_chunk->partitions)
    {
+        ProfileEventsScope scoped_attach(&partition.part_counters);
+
        partition.temp_part.finalize();

        auto & part = partition.temp_part.part;
@ -515,12 +532,14 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF

            /// Set a special error code if the block is duplicate
            int error = (deduplicate && part->is_duplicate) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0;
-            PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns, ExecutionStatus(error));
+            auto counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(partition.part_counters.getPartiallyAtomicSnapshot());
+            PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot), ExecutionStatus(error));
            storage.incrementInsertedPartsProfileEvent(part->getType());
        }
        catch (...)
        {
-            PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns, ExecutionStatus::fromCurrentException("", true));
+            auto counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(partition.part_counters.getPartiallyAtomicSnapshot());
+            PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot), ExecutionStatus::fromCurrentException("", true));
            throw;
        }
    }
@ -579,16 +598,17 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::writeExistingPart(MergeTreeData:
    size_t replicas_num = checkQuorumPrecondition(zookeeper);

    Stopwatch watch;
+    ProfileEventsScope profile_events_scope;

    try
    {
        part->version.setCreationTID(Tx::PrehistoricTID, nullptr);
        commitPart(zookeeper, part, BlockIDsType(), replicas_num, true);
-        PartLog::addNewPart(storage.getContext(), part, watch.elapsed());
+        PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()));
    }
    catch (...)
    {
-        PartLog::addNewPart(storage.getContext(), part, watch.elapsed(), ExecutionStatus::fromCurrentException("", true));
+        PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()), ExecutionStatus::fromCurrentException("", true));
        throw;
    }
 }
--- a/src/Storages/MergeTree/tests/gtest_executor.cpp
+++ b/src/Storages/MergeTree/tests/gtest_executor.cpp
@ -150,8 +150,7 @@ TEST(Executor, RemoveTasks)
    for (size_t i = 0; i < batch; ++i)
        for (size_t j = 0; j < tasks_kinds; ++j)
            ASSERT_TRUE(
-                executor->trySchedule(std::make_shared<FakeExecutableTask>(std::to_string(j)))
-            );
+                executor->trySchedule(std::make_shared<FakeExecutableTask>(std::to_string(j))));

    std::vector<std::thread> threads(batch);

--- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp
@ -56,10 +56,10 @@ int RabbitMQHandler::iterateLoop()

 /// Do not need synchronization as in iterateLoop(), because this method is used only for
 /// initial RabbitMQ setup - at this point there is no background loop thread.
-void RabbitMQHandler::startBlockingLoop()
+int RabbitMQHandler::startBlockingLoop()
 {
    LOG_DEBUG(log, "Started blocking loop.");
-    uv_run(loop, UV_RUN_DEFAULT);
+    return uv_run(loop, UV_RUN_DEFAULT);
 }

 void RabbitMQHandler::stopLoop()
--- a/src/Storages/RabbitMQ/RabbitMQHandler.h
+++ b/src/Storages/RabbitMQ/RabbitMQHandler.h
@ -38,7 +38,7 @@ public:

    /// Loop to wait for small tasks in a blocking mode.
    /// No synchronization is done with the main loop thread.
-    void startBlockingLoop();
+    int startBlockingLoop();

    void stopLoop();

--- a/src/Storages/RabbitMQ/RabbitMQProducer.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQProducer.cpp
@ -262,7 +262,20 @@ void RabbitMQProducer::startProducingTaskLoop()
        LOG_TEST(log, "Waiting for pending callbacks to finish (count: {}, try: {})", res, try_num);
    }

-    LOG_DEBUG(log, "Producer on channel {} completed", channel_id);
+    producer_channel->close()
+    .onSuccess([&]()
+    {
+        LOG_TRACE(log, "Successfully closed producer channel");
+        connection.getHandler().stopLoop();
+    })
+    .onError([&](const char * message)
+    {
+        LOG_ERROR(log, "Failed to close producer channel: {}", message);
+        connection.getHandler().stopLoop();
+    });
+
+    int active = connection.getHandler().startBlockingLoop();
+    LOG_DEBUG(log, "Producer on channel completed (not finished events: {})", active);
 }


--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -8,6 +8,7 @@
 #include <Backups/BackupEntriesCollector.h>
 #include <Databases/IDatabase.h>
 #include <Common/escapeForFileName.h>
+#include <Common/ProfileEventsScope.h>
 #include <Common/typeid_cast.h>
 #include <Common/ThreadPool.h>
 #include <Interpreters/InterpreterAlterQuery.h>
@ -1619,6 +1620,7 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont
        waitForOutdatedPartsToBeLoaded();

        Stopwatch watch;
+        ProfileEventsScope profile_events_scope;

        auto txn = query_context->getCurrentTransaction();
        MergeTreeData::Transaction transaction(*this, txn.get());
@ -1639,7 +1641,7 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont
            auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
            renameAndCommitEmptyParts(new_data_parts, transaction);

-            PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+            PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot()));

            LOG_INFO(log, "Truncated table with {} parts by replacing them with new empty {} parts. With txn {}",
                     parts.size(), future_parts.size(),
@ -1661,6 +1663,7 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt
        auto merge_blocker = stopMergesAndWait();

        Stopwatch watch;
+        ProfileEventsScope profile_events_scope;

        /// It's important to create it outside of lock scope because
        /// otherwise it can lock parts in destructor and deadlock is possible.
@ -1692,7 +1695,7 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt
                auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
                renameAndCommitEmptyParts(new_data_parts, transaction);

-                PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+                PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot()));

                const auto * op = detach ? "Detached" : "Dropped";
                LOG_INFO(log, "{} {} part by replacing it with new empty {} part. With txn {}",
@ -1718,6 +1721,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont
        auto merge_blocker = stopMergesAndWait();

        Stopwatch watch;
+        ProfileEventsScope profile_events_scope;

        /// It's important to create it outside of lock scope because
        /// otherwise it can lock parts in destructor and deadlock is possible.
@ -1757,7 +1761,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont
            auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
            renameAndCommitEmptyParts(new_data_parts, transaction);

-            PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
+            PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot()));

            const auto * op = detach ? "Detached" : "Dropped";
            LOG_INFO(log, "{} partition with {} parts by replacing them with new empty {} parts. With txn {}",
@ -1825,6 +1829,8 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
    auto my_metadata_snapshot = getInMemoryMetadataPtr();

    Stopwatch watch;
+    ProfileEventsScope profile_events_scope;
+
    MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot);
    String partition_id = getPartitionIDFromQuery(partition, local_context);

@ -1889,11 +1895,12 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
                removePartsInRangeFromWorkingSet(local_context->getCurrentTransaction().get(), drop_range, data_parts_lock);
        }

-        PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
+        /// Note: same elapsed time and profile events for all parts is used
+        PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed(), profile_events_scope.getSnapshot()));
    }
    catch (...)
    {
-        PartLog::addNewParts(getContext(), dst_parts, watch.elapsed(), ExecutionStatus::fromCurrentException("", true));
+        PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed()), ExecutionStatus::fromCurrentException("", true));
        throw;
    }
 }
@ -1920,6 +1927,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const
    auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr();
    auto metadata_snapshot = getInMemoryMetadataPtr();
    Stopwatch watch;
+    ProfileEventsScope profile_events_scope;

    MergeTreeData & src_data = dest_table_storage->checkStructureAndGetMergeTreeData(*this, metadata_snapshot, dest_metadata_snapshot);
    String partition_id = getPartitionIDFromQuery(partition, local_context);
@ -1972,11 +1980,12 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const

        clearOldPartsFromFilesystem();

-        PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
+        /// Note: same elapsed time and profile events for all parts is used
+        PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed(), profile_events_scope.getSnapshot()));
    }
    catch (...)
    {
-        PartLog::addNewParts(getContext(), dst_parts, watch.elapsed(), ExecutionStatus::fromCurrentException("", true));
+        PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed()), ExecutionStatus::fromCurrentException("", true));
        throw;
    }
 }
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -2,8 +2,10 @@

 #include <cstddef>
 #include <ranges>
-#include "Common/hex.h"
+
+#include <Common/hex.h>
 #include <Common/Macros.h>
+#include <Common/ProfileEventsScope.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/Types.h>
@ -1592,6 +1594,8 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry)

    if (entry.type == LogEntry::ATTACH_PART)
    {
+        ProfileEventsScope profile_events_scope;
+
        if (MutableDataPartPtr part = attachPartHelperFoundValidPart(entry))
        {
            LOG_TRACE(log, "Found valid local part for {}, preparing the transaction", part->name);
@ -1603,7 +1607,8 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry)
            checkPartChecksumsAndCommit(transaction, part);

            writePartLog(PartLogElement::Type::NEW_PART, {}, 0 /** log entry is fake so we don't measure the time */,
-                part->name, part, {} /** log entry is fake so there are no initial parts */, nullptr);
+                part->name, part, {} /** log entry is fake so there are no initial parts */, nullptr,
+                profile_events_scope.getSnapshot());

            return true;
        }
@ -1947,6 +1952,8 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
 bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
 {
    Stopwatch watch;
+    ProfileEventsScope profile_events_scope;
+
    auto & entry_replace = *entry.replace_range_entry;
    LOG_DEBUG(log, "Executing log entry {} to replace parts range {} with {} parts from {}.{}",
              entry.znode_name, entry_replace.drop_range_part_name, entry_replace.new_part_names.size(),
@ -2339,11 +2346,11 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
            }
        }

-        PartLog::addNewParts(getContext(), res_parts, watch.elapsed());
+        PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(res_parts, watch.elapsed(), profile_events_scope.getSnapshot()));
    }
    catch (...)
    {
-        PartLog::addNewParts(getContext(), res_parts, watch.elapsed(), ExecutionStatus::fromCurrentException("", true));
+        PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(res_parts, watch.elapsed()), ExecutionStatus::fromCurrentException("", true));

        for (const auto & res_part : res_parts)
            unlockSharedData(*res_part);
@ -4008,12 +4015,14 @@ bool StorageReplicatedMergeTree::fetchPart(
    Stopwatch stopwatch;
    MutableDataPartPtr part;
    DataPartsVector replaced_parts;
+    ProfileEventsScope profile_events_scope;

    auto write_part_log = [&] (const ExecutionStatus & execution_status)
    {
        writePartLog(
            PartLogElement::DOWNLOAD_PART, execution_status, stopwatch.elapsed(),
-            part_name, part, replaced_parts, nullptr);
+            part_name, part, replaced_parts, nullptr,
+            profile_events_scope.getSnapshot());
    };

    DataPartPtr part_to_clone;
@ -4244,12 +4253,14 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart(
    Stopwatch stopwatch;
    MutableDataPartPtr part;
    DataPartsVector replaced_parts;
+    ProfileEventsScope profile_events_scope;

    auto write_part_log = [&] (const ExecutionStatus & execution_status)
    {
        writePartLog(
            PartLogElement::DOWNLOAD_PART, execution_status, stopwatch.elapsed(),
-            part_name, part, replaced_parts, nullptr);
+            part_name, part, replaced_parts, nullptr,
+            profile_events_scope.getSnapshot());
    };

    std::function<MutableDataPartPtr()> get_part;
@ -6894,6 +6905,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
    auto metadata_snapshot = getInMemoryMetadataPtr();

    Stopwatch watch;
+    ProfileEventsScope profile_events_scope;
+
    MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot);
    String partition_id = getPartitionIDFromQuery(partition, query_context);

@ -7070,11 +7083,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                    parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
            }

-            PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
+            PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed(), profile_events_scope.getSnapshot()));
        }
        catch (...)
        {
-            PartLog::addNewParts(getContext(), dst_parts, watch.elapsed(), ExecutionStatus::fromCurrentException("", true));
+            PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed()), ExecutionStatus::fromCurrentException("", true));
            for (const auto & dst_part : dst_parts)
                unlockSharedData(*dst_part);

@ -7129,6 +7142,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
    auto metadata_snapshot = getInMemoryMetadataPtr();

    Stopwatch watch;
+    ProfileEventsScope profile_events_scope;
+
    MergeTreeData & src_data = dest_table_storage->checkStructureAndGetMergeTreeData(*this, metadata_snapshot, dest_metadata_snapshot);
    auto src_data_id = src_data.getStorageID();
    String partition_id = getPartitionIDFromQuery(partition, query_context);
@ -7299,11 +7314,11 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
                transaction.commit(&src_data_parts_lock);
            }

-            PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
+            PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed(), profile_events_scope.getSnapshot()));
        }
        catch (...)
        {
-            PartLog::addNewParts(getContext(), dst_parts, watch.elapsed(), ExecutionStatus::fromCurrentException("", true));
+            PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed()), ExecutionStatus::fromCurrentException("", true));

            for (const auto & dst_part : dst_parts)
                dest_table_storage->unlockSharedData(*dst_part);
--- a/tests/integration/test_storage_meilisearch/test.py
+++ b/tests/integration/test_storage_meilisearch/test.py
@ -58,6 +58,7 @@ def test_simple_select(started_cluster):
    push_data(client, table, data)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS simple_meili_table")
    node.query(
        "CREATE TABLE simple_meili_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili1:7700', 'new_table', '')"
    )
@ -83,6 +84,7 @@ def test_insert(started_cluster):
    big_table = client.index("big_table")

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS new_table")
    node.query(
        "CREATE TABLE new_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili1:7700', 'new_table', '')"
    )
@ -90,9 +92,10 @@ def test_insert(started_cluster):
    node.query(
        "INSERT INTO new_table (id, data) VALUES (1, '1') (2, '2') (3, '3') (4, '4') (5, '5') (6, '6') (7, '7')"
    )
-    sleep(1)
+    sleep(5)
    assert len(new_table.get_documents()) == 7

+    node.query("DROP TABLE IF EXISTS big_table")
    node.query(
        "CREATE TABLE big_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili1:7700', 'big_table', '')"
    )
@ -124,6 +127,7 @@ def test_meilimatch(started_cluster):
    push_movies(client)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS movies_table")
    node.query(
        "CREATE TABLE movies_table(id String, title String, release_date Int64) ENGINE = MeiliSearch('http://meili1:7700', 'movies', '')"
    )
@ -208,6 +212,7 @@ def test_incorrect_data_type(started_cluster):
    push_data(client, table, data)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS strange_meili_table")
    node.query(
        "CREATE TABLE strange_meili_table(id UInt64, data String, bbbb String) ENGINE = MeiliSearch('http://meili1:7700', 'new_table', '')"
    )
@ -230,10 +235,12 @@ def test_simple_select_secure(started_cluster):
    push_data(client, table, data)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS simple_meili_table")
    node.query(
        "CREATE TABLE simple_meili_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili_secure:7700', 'new_table', 'password')"
    )

+    node.query("DROP TABLE IF EXISTS wrong_meili_table")
    node.query(
        "CREATE TABLE wrong_meili_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili_secure:7700', 'new_table', 'wrong_password')"
    )
@ -272,6 +279,7 @@ def test_meilimatch_secure(started_cluster):
    push_movies(client)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS movies_table")
    node.query(
        "CREATE TABLE movies_table(id String, title String, release_date Int64) ENGINE = MeiliSearch('http://meili_secure:7700', 'movies', 'password')"
    )
@ -356,6 +364,7 @@ def test_incorrect_data_type_secure(started_cluster):
    push_data(client, table, data)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS strange_meili_table")
    node.query(
        "CREATE TABLE strange_meili_table(id UInt64, data String, bbbb String) ENGINE = MeiliSearch('http://meili_secure:7700', 'new_table', 'password')"
    )
@ -374,6 +383,7 @@ def test_insert_secure(started_cluster):
    big_table = client.index("big_table")

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS new_table")
    node.query(
        "CREATE TABLE new_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili_secure:7700', 'new_table', 'password')"
    )
@ -381,9 +391,10 @@ def test_insert_secure(started_cluster):
    node.query(
        "INSERT INTO new_table (id, data) VALUES (1, '1') (2, '2') (3, '3') (4, '4') (5, '5') (6, '6') (7, '7')"
    )
-    sleep(1)
+    sleep(5)
    assert len(new_table.get_documents()) == 7

+    node.query("DROP TABLE IF EXISTS big_table")
    node.query(
        "CREATE TABLE big_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili_secure:7700', 'big_table', 'password')"
    )
@ -417,9 +428,11 @@ def test_security_levels(started_cluster):
        values += "(" + str(i) + ", " + "'" + str(i) + "'" + ") "

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS read_table")
    node.query(
        f"CREATE TABLE read_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili_secure:7700', 'new_table', '{search_key}')"
    )
+    node.query("DROP TABLE IF EXISTS write_table")
    node.query(
        f"CREATE TABLE write_table(id UInt64, data String) ENGINE = MeiliSearch('http://meili_secure:7700', 'new_table', '{admin_key}')"
    )
@ -430,7 +443,7 @@ def test_security_levels(started_cluster):
    assert "MEILISEARCH_EXCEPTION" in error

    node.query("INSERT INTO write_table (id, data) VALUES " + values)
-    sleep(1)
+    sleep(5)
    assert len(new_table.get_documents({"limit": 40010})) == 100

    ans1 = (
@ -493,6 +506,7 @@ def test_types(started_cluster):
    push_data(client, table, data)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS types_table")
    node.query(
        "CREATE TABLE types_table(\
                                        id UInt64,\
@ -556,6 +570,7 @@ def test_named_collection(started_cluster):
    push_data(client, table, data)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS simple_meili_table")
    node.query(
        "CREATE TABLE simple_meili_table(id UInt64, data String) ENGINE = MeiliSearch( named_collection_for_meili )"
    )
@ -589,14 +604,17 @@ def test_named_collection_secure(started_cluster):
    push_data(client_free, table_free, data)

    node = started_cluster.instances["meili"]
+    node.query("DROP TABLE IF EXISTS simple_meili_table")
    node.query(
        "CREATE TABLE simple_meili_table(id UInt64, data String) ENGINE = MeiliSearch( named_collection_for_meili_secure )"
    )

+    node.query("DROP TABLE IF EXISTS wrong_meili_table")
    node.query(
        "CREATE TABLE wrong_meili_table(id UInt64, data String) ENGINE = MeiliSearch( named_collection_for_meili_secure_no_password )"
    )

+    node.query("DROP TABLE IF EXISTS combine_meili_table")
    node.query(
        'CREATE TABLE combine_meili_table(id UInt64, data String) ENGINE = MeiliSearch( named_collection_for_meili_secure_no_password, key="password" )'
    )
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@ -1034,8 +1034,7 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
                     rabbitmq_exchange_type = 'direct',
                     rabbitmq_num_consumers = 2,
                     rabbitmq_flush_interval_ms=1000,
-                     rabbitmq_max_block_size = 1000,
-                     rabbitmq_num_queues = 2,
+                     rabbitmq_max_block_size = 100,
                     rabbitmq_routing_key_list = 'over',
                     rabbitmq_format = 'TSV',
                     rabbitmq_row_delimiter = '\\n';
@ -1045,8 +1044,6 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
                     rabbitmq_exchange_name = 'over',
                     rabbitmq_exchange_type = 'direct',
                     rabbitmq_routing_key_list = 'over',
-                     rabbitmq_flush_interval_ms=1000,
-                     rabbitmq_max_block_size = 1000,
                     rabbitmq_format = 'TSV',
                     rabbitmq_row_delimiter = '\\n';
        CREATE TABLE test.view_overload (key UInt64, value UInt64)
@ -1087,6 +1084,9 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
        time.sleep(random.uniform(0, 1))
        thread.start()

+    for thread in threads:
+        thread.join()
+
    while True:
        result = instance.query("SELECT count() FROM test.view_overload")
        expected = messages_num * threads_num
@ -1097,16 +1097,13 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):

    instance.query(
        """
-        DROP TABLE test.consumer_overload;
-        DROP TABLE test.view_overload;
-        DROP TABLE test.rabbitmq_consume;
-        DROP TABLE test.rabbitmq_overload;
+        DROP TABLE test.consumer_overload NO DELAY;
+        DROP TABLE test.view_overload NO DELAY;
+        DROP TABLE test.rabbitmq_consume NO DELAY;
+        DROP TABLE test.rabbitmq_overload NO DELAY;
    """
    )

-    for thread in threads:
-        thread.join()
-
    assert (
        int(result) == messages_num * threads_num
    ), "ClickHouse lost some messages: {}".format(result)
--- a/tests/queries/0_stateless/01179_insert_values_semicolon.expect
+++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect
@ -4,6 +4,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -15,7 +16,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 send -- "DROP TABLE IF EXISTS test_01179\r"
--- a/tests/queries/0_stateless/01180_client_syntax_errors.expect
+++ b/tests/queries/0_stateless/01180_client_syntax_errors.expect
@ -3,6 +3,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -14,7 +15,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 # Make a query with syntax error
--- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect
+++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect
@ -3,6 +3,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 10
@ -18,7 +19,7 @@ expect_after {
 # useful debugging configuration
 # exp_internal 1

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 send -- "SELECT 1\r"
@ -60,7 +61,7 @@ expect ":) "
 send -- ""
 expect eof

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --highlight 0 --multiline"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --highlight 0 --multiline --history_file=$history_file"
 expect ":) "

 send -- "SELECT 1;\r"
--- a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect
+++ b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect
@ -3,6 +3,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -14,7 +15,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 send -- "SELECT 1\r"
--- a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect
+++ b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect
@ -4,6 +4,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -15,7 +16,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 # Make a query
@ -28,7 +29,7 @@ exec kill -9 [exp_pid]
 close

 # Run client one more time and press "up" to see the last recorded query
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file"
 expect ":) "
 send -- "\[A"
 expect "for the history"
--- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect
+++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect
@ -3,6 +3,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -14,7 +15,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file"
 expect ":) "

 # Make a query
--- a/tests/queries/0_stateless/01520_client_print_query_id.expect
+++ b/tests/queries/0_stateless/01520_client_print_query_id.expect
@ -3,6 +3,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -14,7 +15,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 # Make a query
--- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
+++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
@ -7,6 +7,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -14,27 +15,35 @@ match_max 100000

 expect_after {
    # Do not ignore eof from expect
-    eof { exp_continue }
+    -i $any_spawn_id eof { exp_continue }
    # A default timeout action is to do nothing, change it to fail
-    timeout { exit 1 }
+    -i $any_spawn_id timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file"
 expect "\n:) "

 send -- "DROP TABLE IF EXISTS t01565;\n"
+# NOTE: this is important for -mn mode, you should send "\r" only after reading echoed command
+expect "DROP"
+send -- "\r"
 expect "\nOk."
 expect "\n:)"

 send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\n"
+expect "CREATE"
+send -- "\r"
 expect "\nOk."
 expect "\n:) "

 send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\n"
-expect "\nConnected"
+expect "INSERT"
+send -- "\r"
 expect "\n:) "

 send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\n"
+expect "INSERT"
+send -- "\r"
 expect "\nOk."
 expect "\n:) "

--- a/tests/queries/0_stateless/01565_query_loop_after_client_error.reference
+++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.reference
--- a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect
+++ b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect
@ -3,6 +3,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -14,7 +15,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 # regression for heap-buffer-overflow issue (under ASAN)
--- a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect
+++ b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect
@ -1,10 +1,9 @@
 #!/usr/bin/expect -f
-# Tags: no-parallel
-# Tag no-parallel: Uses non unique history file

 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -16,17 +15,17 @@ expect_after {
    timeout { exit 1 }
 }

-exec bash -c "echo select 1 > $argv0.txt"
-exec bash -c "echo select 1 >> $argv0.txt"
-exec bash -c "echo select 1 >> $argv0.txt"
+exec bash -c "echo select 1 > $history_file.txt"
+exec bash -c "echo select 1 >> $history_file.txt"
+exec bash -c "echo select 1 >> $history_file.txt"

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$argv0.txt"
-expect "The history file ($argv0.txt) is in old format. 3 lines, 1 unique lines."
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file.txt"
+expect "The history file ($history_file.txt) is in old format. 3 lines, 1 unique lines."
 expect ":) "
 send -- "\4"
 expect eof

-spawn bash -c "wc -l $argv0.txt"
+spawn bash -c "wc -l $history_file.txt"
 # The following lines are expected:
 #
 #     ### YYYY-MM-DD HH:MM:SS.SSS
@ -35,4 +34,4 @@ spawn bash -c "wc -l $argv0.txt"
 expect "2"
 expect eof

-exec bash -c "rm $argv0.txt"
+exec bash -c "rm $history_file.txt"
--- a/tests/queries/0_stateless/01945_show_debug_warning.expect
+++ b/tests/queries/0_stateless/01945_show_debug_warning.expect
@ -6,6 +6,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -20,17 +21,17 @@ expect_after {

 set Debug_type 0

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 # Check debug type
-send -- "SELECT value FROM system.build_options WHERE name='BUILD_TYPE'\r"
+send -- "SELECT lower(value) FROM system.build_options WHERE name='BUILD_TYPE'\r"
 expect {
-"Debug"  {
-    set Debug_type 1 
-    expect ":) "
+    "debug"  {
+        set Debug_type 1
+        expect ":) "
    }
-"RelWithDebInfo"
+    "relwithdebinfo"
 }

 send -- "q\r"
@ -38,7 +39,7 @@ expect eof

 if { $Debug_type > 0} {

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect "Warnings:"
 expect " * Server was built in debug mode. It will work slowly."
 expect ":) "
@ -52,7 +53,7 @@ send -- "q\r"
 expect eof
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file"
 expect "Warnings:"
 expect " * Some obsolete setting is changed."
 expect ":) "
--- a/tests/queries/0_stateless/02003_memory_limit_in_client.expect
+++ b/tests/queries/0_stateless/02003_memory_limit_in_client.expect
@ -1,5 +1,5 @@
 #!/usr/bin/expect -f
-# Tags: no-parallel, no-fasttest
+# Tags: no-fasttest

 # This is a test for system.warnings. Testing in interactive mode is necessary,
 # as we want to see certain warnings from client
@ -7,6 +7,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -22,7 +23,7 @@ expect_after {
 #
 # Check that the query will fail in clickhouse-client
 #
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1 --history_file=$history_file"
 expect ":) "

 send  -- "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000)\r"
@ -37,7 +38,7 @@ expect eof
 #
 # Check that the query will fail in clickhouse-client
 #
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1 --history_file=$history_file"
 expect ":) "

 send  -- "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000)\r"
@ -52,7 +53,7 @@ expect eof
 #
 # Check that the query will not fail (due to max_untracked_memory)
 #
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1 --history_file=$history_file"
 expect ":) "

 send  -- "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000\r"
--- a/tests/queries/0_stateless/02047_client_exception.expect
+++ b/tests/queries/0_stateless/02047_client_exception.expect
@ -3,6 +3,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 20
@ -15,7 +16,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 send -- "DROP TABLE IF EXISTS test_02047\r"
--- a/tests/queries/0_stateless/02105_backslash_letter_commands.expect
+++ b/tests/queries/0_stateless/02105_backslash_letter_commands.expect
@ -3,9 +3,10 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
-set timeout 02
+set timeout 60
 match_max 100000
 expect_after {
    # Do not ignore eof from expect
@ -14,7 +15,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 # Send a command
--- a/tests/queries/0_stateless/02116_interactive_hello.expect
+++ b/tests/queries/0_stateless/02116_interactive_hello.expect
@ -4,6 +4,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -16,7 +17,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"

 expect -re "ClickHouse client version \[\\d\]{2}.\[\\d\]{1,2}.\[\\d\]{1,2}.\[\\d\]{1,2}.\r"
 expect -re "Connecting to database .* at localhost:9000 as user default.\r"
--- a/tests/queries/0_stateless/02132_client_history_navigation.expect
+++ b/tests/queries/0_stateless/02132_client_history_navigation.expect
@ -3,9 +3,10 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
-set timeout 3
+set timeout 60
 match_max 100000

 expect_after {
@ -18,7 +19,7 @@ expect_after {
 # useful debugging configuration
 # exp_internal 1

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --highlight 0"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --highlight 0 --history_file=$history_file"
 expect ":) "

 # Make a query
@ -26,6 +27,7 @@ send -- "SELECT 1\r"
 expect "1"
 expect ":) "
 send -- "SELECT 2"
+# NOTE: it does not work for alacritty with TERM=xterm
 send -- "\033\[A"
 expect "SELECT 1"
 send -- "\033\[B"
--- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect
+++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect
@ -3,6 +3,7 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
 set timeout 60
@ -15,7 +16,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file"
 expect ":) "

 # Make a query
--- a/tests/queries/0_stateless/02378_part_log_profile_events.reference
+++ b/tests/queries/0_stateless/02378_part_log_profile_events.reference
@ -0,0 +1,3 @@
+Ok	Ok	Ok	Ok	Ok	Ok
+Ok	Ok
+Ok	Ok	Ok
--- a/tests/queries/0_stateless/02378_part_log_profile_events.sql
+++ b/tests/queries/0_stateless/02378_part_log_profile_events.sql
@ -0,0 +1,50 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (key UInt64, val UInt64) engine = MergeTree Order by key PARTITION BY key >= 128;
+
+SET max_block_size = 64, max_insert_block_size = 64, min_insert_block_size_rows = 64;
+
+INSERT INTO test SELECT number AS key, sipHash64(number) AS val FROM numbers(512);
+
+SYSTEM FLUSH LOGS;
+
+SELECT
+    if(count(DISTINCT query_id) == 1, 'Ok', 'Error: ' || toString(count(DISTINCT query_id))),
+    if(count() == 512 / 64, 'Ok', 'Error: ' || toString(count())), -- 512 rows inserted, 64 rows per block
+    if(SUM(ProfileEvents['MergeTreeDataWriterRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterRows']))),
+    if(SUM(ProfileEvents['MergeTreeDataWriterUncompressedBytes']) >= 1024, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterUncompressedBytes']))),
+    if(SUM(ProfileEvents['MergeTreeDataWriterCompressedBytes']) >= 1024, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterCompressedBytes']))),
+    if(SUM(ProfileEvents['MergeTreeDataWriterBlocks']) >= 8, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterBlocks'])))
+FROM system.part_log
+WHERE event_time > now() - INTERVAL 10 MINUTE
+    AND database == currentDatabase() AND table == 'test'
+    AND event_type == 'NewPart'
+;
+
+OPTIMIZE TABLE test FINAL;
+
+SYSTEM FLUSH LOGS;
+SELECT
+    if(count() > 2, 'Ok', 'Error: ' || toString(count())),
+    if(SUM(ProfileEvents['MergedRows']) >= 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergedRows'])))
+FROM system.part_log
+WHERE event_time > now() - INTERVAL 10 MINUTE
+    AND database == currentDatabase() AND table == 'test'
+    AND event_type == 'MergeParts'
+;
+
+ALTER TABLE test UPDATE val = 0 WHERE key % 2 == 0 SETTINGS mutations_sync = 2;
+
+SYSTEM FLUSH LOGS;
+
+SELECT
+    if(count() == 2, 'Ok', 'Error: ' || toString(count())),
+    if(SUM(ProfileEvents['MergedRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergedRows']))),
+    if(SUM(ProfileEvents['FileOpen']) > 1, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['FileOpen'])))
+FROM system.part_log
+WHERE event_time > now() - INTERVAL 10 MINUTE
+    AND database == currentDatabase() AND table == 'test'
+    AND event_type == 'MutatePart'
+;
+
+DROP TABLE test;
--- a/tests/queries/0_stateless/02378_part_log_profile_events_replicated.reference
+++ b/tests/queries/0_stateless/02378_part_log_profile_events_replicated.reference
@ -0,0 +1 @@
+1
--- a/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql
+++ b/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql
@ -0,0 +1,40 @@
+
+-- Tags: long, replica, no-replicated-database, no-parallel
+
+DROP TABLE IF EXISTS part_log_profile_events_r1 NO DELAY;
+DROP TABLE IF EXISTS part_log_profile_events_r2 NO DELAY;
+
+CREATE TABLE part_log_profile_events_r1 (x UInt64)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_02378/part_log_profile_events', 'r1')
+ORDER BY x
+PARTITION BY x >= 128
+;
+
+CREATE TABLE part_log_profile_events_r2 (x UInt64)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_02378/part_log_profile_events', 'r2')
+ORDER BY x
+PARTITION BY x >= 128
+;
+
+-- SYSTEM STOP MERGES part_log_profile_events_r1;
+-- SYSTEM STOP MERGES part_log_profile_events_r2;
+
+SET max_block_size = 64, max_insert_block_size = 64, min_insert_block_size_rows = 64;
+
+INSERT INTO part_log_profile_events_r1 SELECT number FROM numbers(1000);
+
+SYSTEM SYNC REPLICA part_log_profile_events_r2;
+
+SYSTEM FLUSH LOGS;
+
+SELECT
+    count() > 1
+    AND SUM(ProfileEvents['ZooKeeperTransactions']) >= 4
+FROM system.part_log
+WHERE event_time > now() - INTERVAL 10 MINUTE
+    AND database == currentDatabase() AND table == 'part_log_profile_events_r2'
+    AND event_type == 'DownloadPart'
+;
+
+DROP TABLE part_log_profile_events_r1 NO DELAY;
+DROP TABLE part_log_profile_events_r2 NO DELAY;
--- a/tests/queries/0_stateless/02417_repeat_input_commands.expect
+++ b/tests/queries/0_stateless/02417_repeat_input_commands.expect
@ -3,9 +3,10 @@
 set basedir [file dirname $argv0]
 set basename [file tail $argv0]
 exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+set history_file $env(CLICKHOUSE_TMP)/$basename.history

 log_user 0
-set timeout 10
+set timeout 60
 match_max 100000

 expect_after {
@ -15,7 +16,7 @@ expect_after {
    timeout { exit 1 }
 }

-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
 expect ":) "

 # -----------------------------------------
--- a/tests/queries/0_stateless/02420_final_setting.reference
+++ b/tests/queries/0_stateless/02420_final_setting.reference
@ -0,0 +1,169 @@
+-- { echoOn }
+SYSTEM STOP MERGES tbl;
+-- simple test case
+create table if not exists replacing_mt (x String) engine=ReplacingMergeTree() ORDER BY x;
+insert into replacing_mt values ('abc');
+insert into replacing_mt values ('abc');
+-- expected output is 2 because final is turned off
+select count() from replacing_mt;
+2
+set final = 1;
+-- expected output is 1 because final is turned on
+select count() from replacing_mt;
+1
+-- JOIN test cases
+create table if not exists lhs (x String) engine=ReplacingMergeTree() ORDER BY x;
+create table if not exists rhs (x String) engine=ReplacingMergeTree() ORDER BY x;
+insert into lhs values ('abc');
+insert into lhs values ('abc');
+insert into rhs values ('abc');
+insert into rhs values ('abc');
+set final = 0;
+-- expected output is 4 because select_final == 0
+select count() from lhs inner join rhs on lhs.x = rhs.x;
+4
+set final = 1;
+-- expected output is 1 because final == 1
+select count() from lhs inner join rhs on lhs.x = rhs.x;
+1
+-- regular non final table
+set final = 1;
+create table if not exists regular_mt_table (x String) engine=MergeTree() ORDER BY x;
+insert into regular_mt_table values ('abc');
+insert into regular_mt_table values ('abc');
+-- expected output is 1, it should silently ignore final modifier
+select count() from regular_mt_table;
+2
+-- view test
+create materialized VIEW mv_regular_mt_table TO regular_mt_table AS SELECT * FROM regular_mt_table;
+create view nv_regular_mt_table AS SELECT * FROM mv_regular_mt_table;
+set final=1;
+select count() from nv_regular_mt_table;
+2
+-- join on mix of tables that support / do not support select final with explain
+create table if not exists left_table (id UInt64, val_left String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists middle_table (id UInt64, val_middle String) engine=MergeTree() ORDER BY id;
+create table if not exists right_table (id UInt64, val_right String) engine=ReplacingMergeTree() ORDER BY id;
+insert into left_table values (1,'a');
+insert into left_table values (1,'b');
+insert into left_table values (1,'c');
+insert into middle_table values (1,'a');
+insert into middle_table values (1,'b');
+insert into right_table values (1,'a');
+insert into right_table values (1,'b');
+insert into right_table values (1,'c');
+-- expected output
+-- 1 c a c
+-- 1 c b c
+select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+1	c	a	c
+1	c	b	c
+explain syntax select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+SELECT
+    `--left_table.id` AS `left_table.id`,
+    val_left,
+    val_middle,
+    val_right
+FROM
+(
+    SELECT
+        val_left,
+        id AS `--left_table.id`,
+        val_middle,
+        middle_table.id AS `--middle_table.id`
+    FROM left_table
+    FINAL
+    ALL INNER JOIN
+    (
+        SELECT
+            id,
+            val_middle
+        FROM middle_table
+    ) AS middle_table ON `--left_table.id` = `--middle_table.id`
+) AS `--.s`
+ALL INNER JOIN
+(
+    SELECT
+        id,
+        val_right
+    FROM right_table
+    FINAL
+) AS right_table ON `--middle_table.id` = id
+ORDER BY
+    `--left_table.id` ASC,
+    val_left ASC,
+    val_middle ASC,
+    val_right ASC
+-- extra: same with subquery
+select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join (SELECT * FROM right_table WHERE id = 1) r on middle_table.id = r.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+1	c	a	c
+1	c	b	c
+-- distributed tables
+drop table if exists left_table;
+drop table if exists middle_table;
+drop table if exists right_table;
+create table if not exists left_table (id UInt64, val_left String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists middle_table (id UInt64, val_middle String) engine=MergeTree() ORDER BY id;
+create table if not exists right_table_local (id UInt64, val_right String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists right_table engine=Distributed('test_shard_localhost', currentDatabase(), right_table_local) AS right_table_local;
+insert into left_table values (1,'a');
+insert into left_table values (1,'b');
+insert into left_table values (1,'c');
+insert into middle_table values (1,'a');
+insert into middle_table values (1,'b');
+insert into right_table_local values (1,'a');
+insert into right_table_local values (1,'b');
+insert into right_table_local values (1,'c');
+SET prefer_localhost_replica=0;
+-- expected output:
+-- 1 c 1 a 1 c
+-- 1 c 1 b 1 c
+select left_table.*,middle_table.*, right_table.* from left_table
+                                                           inner join middle_table on left_table.id = middle_table.id
+                                                           inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+1	c	1	a	1	c
+1	c	1	b	1	c
+SET prefer_localhost_replica=1;
+-- expected output:
+-- 1 c 1 a 1 c
+-- 1 c 1 b 1 c
+select left_table.*,middle_table.*, right_table.* from left_table
+                                                           inner join middle_table on left_table.id = middle_table.id
+                                                           inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+1	c	1	a	1	c
+1	c	1	b	1	c
+-- Quite exotic with Merge engine
+DROP TABLE IF EXISTS table_to_merge_a;
+DROP TABLE IF EXISTS table_to_merge_b;
+DROP TABLE IF EXISTS table_to_merge_c;
+DROP TABLE IF EXISTS merge_table;
+create table if not exists table_to_merge_a (id UInt64, val String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists table_to_merge_b (id UInt64, val String) engine=MergeTree() ORDER BY id;
+create table if not exists table_to_merge_c (id UInt64, val String) engine=ReplacingMergeTree() ORDER BY id;
+CREATE TABLE merge_table Engine=Merge(currentDatabase(), '^(table_to_merge_[a-z])$') AS table_to_merge_a;
+insert into table_to_merge_a values (1,'a');
+insert into table_to_merge_a values (1,'b');
+insert into table_to_merge_a values (1,'c');
+insert into table_to_merge_b values (2,'a');
+insert into table_to_merge_b values (2,'b');
+insert into table_to_merge_c values (3,'a');
+insert into table_to_merge_c values (3,'b');
+insert into table_to_merge_c values (3,'c');
+-- expected output:
+-- 1 c, 2 a, 2 b, 3 c
+SELECT * FROM merge_table ORDER BY id, val;
+1	c
+2	a
+2	b
+3	c
--- a/tests/queries/0_stateless/02420_final_setting.sql
+++ b/tests/queries/0_stateless/02420_final_setting.sql
@ -0,0 +1,137 @@
+-- { echoOn }
+SYSTEM STOP MERGES tbl;
+
+-- simple test case
+create table if not exists replacing_mt (x String) engine=ReplacingMergeTree() ORDER BY x;
+
+insert into replacing_mt values ('abc');
+insert into replacing_mt values ('abc');
+
+-- expected output is 2 because final is turned off
+select count() from replacing_mt;
+
+set final = 1;
+-- expected output is 1 because final is turned on
+select count() from replacing_mt;
+
+-- JOIN test cases
+create table if not exists lhs (x String) engine=ReplacingMergeTree() ORDER BY x;
+create table if not exists rhs (x String) engine=ReplacingMergeTree() ORDER BY x;
+
+insert into lhs values ('abc');
+insert into lhs values ('abc');
+
+insert into rhs values ('abc');
+insert into rhs values ('abc');
+
+set final = 0;
+-- expected output is 4 because select_final == 0
+select count() from lhs inner join rhs on lhs.x = rhs.x;
+
+set final = 1;
+-- expected output is 1 because final == 1
+select count() from lhs inner join rhs on lhs.x = rhs.x;
+
+-- regular non final table
+set final = 1;
+create table if not exists regular_mt_table (x String) engine=MergeTree() ORDER BY x;
+insert into regular_mt_table values ('abc');
+insert into regular_mt_table values ('abc');
+-- expected output is 1, it should silently ignore final modifier
+select count() from regular_mt_table;
+
+-- view test
+create materialized VIEW mv_regular_mt_table TO regular_mt_table AS SELECT * FROM regular_mt_table;
+create view nv_regular_mt_table AS SELECT * FROM mv_regular_mt_table;
+
+set final=1;
+select count() from nv_regular_mt_table;
+
+-- join on mix of tables that support / do not support select final with explain
+create table if not exists left_table (id UInt64, val_left String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists middle_table (id UInt64, val_middle String) engine=MergeTree() ORDER BY id;
+create table if not exists right_table (id UInt64, val_right String) engine=ReplacingMergeTree() ORDER BY id;
+insert into left_table values (1,'a');
+insert into left_table values (1,'b');
+insert into left_table values (1,'c');
+insert into middle_table values (1,'a');
+insert into middle_table values (1,'b');
+insert into right_table values (1,'a');
+insert into right_table values (1,'b');
+insert into right_table values (1,'c');
+-- expected output
+-- 1 c a c
+-- 1 c b c
+select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+
+explain syntax select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+
+-- extra: same with subquery
+select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join (SELECT * FROM right_table WHERE id = 1) r on middle_table.id = r.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+
+-- distributed tables
+drop table if exists left_table;
+drop table if exists middle_table;
+drop table if exists right_table;
+create table if not exists left_table (id UInt64, val_left String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists middle_table (id UInt64, val_middle String) engine=MergeTree() ORDER BY id;
+create table if not exists right_table_local (id UInt64, val_right String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists right_table engine=Distributed('test_shard_localhost', currentDatabase(), right_table_local) AS right_table_local;
+insert into left_table values (1,'a');
+insert into left_table values (1,'b');
+insert into left_table values (1,'c');
+insert into middle_table values (1,'a');
+insert into middle_table values (1,'b');
+insert into right_table_local values (1,'a');
+insert into right_table_local values (1,'b');
+insert into right_table_local values (1,'c');
+SET prefer_localhost_replica=0;
+-- expected output:
+-- 1 c 1 a 1 c
+-- 1 c 1 b 1 c
+select left_table.*,middle_table.*, right_table.* from left_table
+                                                           inner join middle_table on left_table.id = middle_table.id
+                                                           inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+
+SET prefer_localhost_replica=1;
+-- expected output:
+-- 1 c 1 a 1 c
+-- 1 c 1 b 1 c
+select left_table.*,middle_table.*, right_table.* from left_table
+                                                           inner join middle_table on left_table.id = middle_table.id
+                                                           inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+
+-- Quite exotic with Merge engine
+DROP TABLE IF EXISTS table_to_merge_a;
+DROP TABLE IF EXISTS table_to_merge_b;
+DROP TABLE IF EXISTS table_to_merge_c;
+DROP TABLE IF EXISTS merge_table;
+
+create table if not exists table_to_merge_a (id UInt64, val String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists table_to_merge_b (id UInt64, val String) engine=MergeTree() ORDER BY id;
+create table if not exists table_to_merge_c (id UInt64, val String) engine=ReplacingMergeTree() ORDER BY id;
+CREATE TABLE merge_table Engine=Merge(currentDatabase(), '^(table_to_merge_[a-z])$') AS table_to_merge_a;
+
+insert into table_to_merge_a values (1,'a');
+insert into table_to_merge_a values (1,'b');
+insert into table_to_merge_a values (1,'c');
+insert into table_to_merge_b values (2,'a');
+insert into table_to_merge_b values (2,'b');
+insert into table_to_merge_c values (3,'a');
+insert into table_to_merge_c values (3,'b');
+insert into table_to_merge_c values (3,'c');
+
+-- expected output:
+-- 1 c, 2 a, 2 b, 3 c
+SELECT * FROM merge_table ORDER BY id, val;
--- a/tests/queries/0_stateless/02420_final_setting_analyzer.reference
+++ b/tests/queries/0_stateless/02420_final_setting_analyzer.reference
@ -0,0 +1,137 @@
+-- { echoOn }
+set allow_experimental_analyzer=1;
+SYSTEM STOP MERGES tbl;
+-- simple test case
+create table if not exists replacing_mt (x String) engine=ReplacingMergeTree() ORDER BY x;
+insert into replacing_mt values ('abc');
+insert into replacing_mt values ('abc');
+-- expected output is 2 because final is turned off
+select count() from replacing_mt;
+2
+set final = 1;
+-- expected output is 1 because final is turned on
+select count() from replacing_mt;
+1
+-- JOIN test cases
+create table if not exists lhs (x String) engine=ReplacingMergeTree() ORDER BY x;
+create table if not exists rhs (x String) engine=ReplacingMergeTree() ORDER BY x;
+insert into lhs values ('abc');
+insert into lhs values ('abc');
+insert into rhs values ('abc');
+insert into rhs values ('abc');
+set final = 0;
+-- expected output is 4 because select_final == 0
+select count() from lhs inner join rhs on lhs.x = rhs.x;
+4
+set final = 1;
+-- expected output is 1 because final == 1
+select count() from lhs inner join rhs on lhs.x = rhs.x;
+1
+-- regular non final table
+set final = 1;
+create table if not exists regular_mt_table (x String) engine=MergeTree() ORDER BY x;
+insert into regular_mt_table values ('abc');
+insert into regular_mt_table values ('abc');
+-- expected output is 2, it should silently ignore final modifier
+select count() from regular_mt_table;
+2
+-- view test
+create materialized VIEW mv_regular_mt_table TO regular_mt_table AS SELECT * FROM regular_mt_table;
+create view nv_regular_mt_table AS SELECT * FROM mv_regular_mt_table;
+set final=1;
+select count() from nv_regular_mt_table;
+2
+-- join on mix of tables that support / do not support select final with explain
+create table if not exists left_table (id UInt64, val_left String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists middle_table (id UInt64, val_middle String) engine=MergeTree() ORDER BY id;
+create table if not exists right_table (id UInt64, val_right String) engine=ReplacingMergeTree() ORDER BY id;
+insert into left_table values (1,'a');
+insert into left_table values (1,'b');
+insert into left_table values (1,'c');
+insert into middle_table values (1,'a');
+insert into middle_table values (1,'b');
+insert into right_table values (1,'a');
+insert into right_table values (1,'b');
+insert into right_table values (1,'c');
+-- expected output
+-- 1 c a c
+-- 1 c b c
+select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+1	c	a	c
+1	c	b	c
+explain syntax select left_table.id,val_left, val_middle, val_right from left_table
+                                                                             inner join middle_table on left_table.id = middle_table.id
+                                                                             inner join right_table on middle_table.id = right_table.id
+               ORDER BY left_table.id, val_left, val_middle, val_right;
+SELECT
+    `--left_table.id` AS `left_table.id`,
+    val_left,
+    val_middle,
+    val_right
+FROM
+(
+    SELECT
+        val_left,
+        id AS `--left_table.id`,
+        val_middle,
+        middle_table.id AS `--middle_table.id`
+    FROM left_table
+    FINAL
+    ALL INNER JOIN
+    (
+        SELECT
+            id,
+            val_middle
+        FROM middle_table
+    ) AS middle_table ON `--left_table.id` = `--middle_table.id`
+) AS `--.s`
+ALL INNER JOIN
+(
+    SELECT
+        id,
+        val_right
+    FROM right_table
+    FINAL
+) AS right_table ON `--middle_table.id` = id
+ORDER BY
+    `--left_table.id` ASC,
+    val_left ASC,
+    val_middle ASC,
+    val_right ASC
+-- extra: same with subquery
+select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join (SELECT * FROM right_table WHERE id = 1) r on middle_table.id = r.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+1	c	a	c
+1	c	b	c
+-- no distributed tests because it is not currently supported:
+-- JOIN with remote storages is unsupported.
+
+-- Quite exotic with Merge engine
+DROP TABLE IF EXISTS table_to_merge_a;
+DROP TABLE IF EXISTS table_to_merge_b;
+DROP TABLE IF EXISTS table_to_merge_c;
+DROP TABLE IF EXISTS merge_table;
+create table if not exists table_to_merge_a (id UInt64, val String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists table_to_merge_b (id UInt64, val String) engine=MergeTree() ORDER BY id;
+create table if not exists table_to_merge_c (id UInt64, val String) engine=ReplacingMergeTree() ORDER BY id;
+CREATE TABLE merge_table Engine=Merge(currentDatabase(), '^(table_to_merge_[a-z])$') AS table_to_merge_a;
+insert into table_to_merge_a values (1,'a');
+insert into table_to_merge_a values (1,'b');
+insert into table_to_merge_a values (1,'c');
+insert into table_to_merge_b values (2,'a');
+insert into table_to_merge_b values (2,'b');
+insert into table_to_merge_c values (3,'a');
+insert into table_to_merge_c values (3,'b');
+insert into table_to_merge_c values (3,'c');
+-- expected output:
+-- 1 c, 2 a, 2 b, 3 c
+SELECT * FROM merge_table ORDER BY id, val;
+1	c
+2	a
+2	b
+3	c
--- a/tests/queries/0_stateless/02420_final_setting_analyzer.sql
+++ b/tests/queries/0_stateless/02420_final_setting_analyzer.sql
@ -0,0 +1,107 @@
+-- { echoOn }
+set allow_experimental_analyzer=1;
+SYSTEM STOP MERGES tbl;
+
+-- simple test case
+create table if not exists replacing_mt (x String) engine=ReplacingMergeTree() ORDER BY x;
+
+insert into replacing_mt values ('abc');
+insert into replacing_mt values ('abc');
+
+-- expected output is 2 because final is turned off
+select count() from replacing_mt;
+
+set final = 1;
+-- expected output is 1 because final is turned on
+select count() from replacing_mt;
+
+-- JOIN test cases
+create table if not exists lhs (x String) engine=ReplacingMergeTree() ORDER BY x;
+create table if not exists rhs (x String) engine=ReplacingMergeTree() ORDER BY x;
+
+insert into lhs values ('abc');
+insert into lhs values ('abc');
+
+insert into rhs values ('abc');
+insert into rhs values ('abc');
+
+set final = 0;
+-- expected output is 4 because select_final == 0
+select count() from lhs inner join rhs on lhs.x = rhs.x;
+
+set final = 1;
+-- expected output is 1 because final == 1
+select count() from lhs inner join rhs on lhs.x = rhs.x;
+
+-- regular non final table
+set final = 1;
+create table if not exists regular_mt_table (x String) engine=MergeTree() ORDER BY x;
+insert into regular_mt_table values ('abc');
+insert into regular_mt_table values ('abc');
+-- expected output is 2, it should silently ignore final modifier
+select count() from regular_mt_table;
+
+-- view test
+create materialized VIEW mv_regular_mt_table TO regular_mt_table AS SELECT * FROM regular_mt_table;
+create view nv_regular_mt_table AS SELECT * FROM mv_regular_mt_table;
+
+set final=1;
+select count() from nv_regular_mt_table;
+
+-- join on mix of tables that support / do not support select final with explain
+create table if not exists left_table (id UInt64, val_left String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists middle_table (id UInt64, val_middle String) engine=MergeTree() ORDER BY id;
+create table if not exists right_table (id UInt64, val_right String) engine=ReplacingMergeTree() ORDER BY id;
+insert into left_table values (1,'a');
+insert into left_table values (1,'b');
+insert into left_table values (1,'c');
+insert into middle_table values (1,'a');
+insert into middle_table values (1,'b');
+insert into right_table values (1,'a');
+insert into right_table values (1,'b');
+insert into right_table values (1,'c');
+-- expected output
+-- 1 c a c
+-- 1 c b c
+select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join right_table on middle_table.id = right_table.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+
+explain syntax select left_table.id,val_left, val_middle, val_right from left_table
+                                                                             inner join middle_table on left_table.id = middle_table.id
+                                                                             inner join right_table on middle_table.id = right_table.id
+               ORDER BY left_table.id, val_left, val_middle, val_right;
+
+-- extra: same with subquery
+select left_table.id,val_left, val_middle, val_right from left_table
+                                                              inner join middle_table on left_table.id = middle_table.id
+                                                              inner join (SELECT * FROM right_table WHERE id = 1) r on middle_table.id = r.id
+ORDER BY left_table.id, val_left, val_middle, val_right;
+
+-- no distributed tests because it is not currently supported:
+-- JOIN with remote storages is unsupported.
+
+-- Quite exotic with Merge engine
+DROP TABLE IF EXISTS table_to_merge_a;
+DROP TABLE IF EXISTS table_to_merge_b;
+DROP TABLE IF EXISTS table_to_merge_c;
+DROP TABLE IF EXISTS merge_table;
+
+create table if not exists table_to_merge_a (id UInt64, val String) engine=ReplacingMergeTree() ORDER BY id;
+create table if not exists table_to_merge_b (id UInt64, val String) engine=MergeTree() ORDER BY id;
+create table if not exists table_to_merge_c (id UInt64, val String) engine=ReplacingMergeTree() ORDER BY id;
+CREATE TABLE merge_table Engine=Merge(currentDatabase(), '^(table_to_merge_[a-z])$') AS table_to_merge_a;
+
+insert into table_to_merge_a values (1,'a');
+insert into table_to_merge_a values (1,'b');
+insert into table_to_merge_a values (1,'c');
+insert into table_to_merge_b values (2,'a');
+insert into table_to_merge_b values (2,'b');
+insert into table_to_merge_c values (3,'a');
+insert into table_to_merge_c values (3,'b');
+insert into table_to_merge_c values (3,'c');
+
+-- expected output:
+-- 1 c, 2 a, 2 b, 3 c
+SELECT * FROM merge_table ORDER BY id, val;
--- a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.reference
+++ b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.reference
@ -0,0 +1,73 @@
+-- { echoOn }
+
+SELECT cast(id1 as UInt16) AS id16 FROM test_02559 PREWHERE id16 and (id2 % 40000) LIMIT 10;
+1
+2
+3
+4
+5
+6
+7
+8
+9
+SELECT cast(id1 as UInt16) AS cond1, (id2 % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE cond LIMIT 10;
+1	1	1
+2	2	1
+3	3	1
+4	4	1
+5	5	1
+6	6	1
+7	7	1
+8	8	1
+9	9	1
+SELECT cast(id1 as UInt16) AS cond1, (if(id2 > 3, id2, NULL) % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE cond LIMIT 10;
+4	4	1
+5	5	1
+6	6	1
+7	7	1
+8	8	1
+9	9	1
+SELECT cast(id1 as UInt16) AS cond1, (id2 % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE cond AND id2 > 4 LIMIT 10;
+5	5	1
+6	6	1
+7	7	1
+8	8	1
+9	9	1
+SELECT cast(id1 as UInt16) AS cond1, (id2 % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE id2 > 5 AND cond LIMIT 10;
+6	6	1
+7	7	1
+8	8	1
+9	9	1
+SELECT cast(id1 as UInt16) AS cond1, (id2 % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE cond1 AND id2 > 6 AND cond2 LIMIT 10;
+7	7	1
+8	8	1
+9	9	1
+SELECT cast(id1 as UInt16) AS cond1 FROM test_02559 PREWHERE cond1 LIMIT 10; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }
+SELECT * FROM test_02559 PREWHERE id1 <= 3 AND id2 > 0 WHERE (id1 + id2 < 15) LIMIT 10;
+1	1
+2	2
+3	3
+SELECT count() FROM test_02559 PREWHERE id2>=0 AND (1 OR ignore(id1)) WHERE ignore(id1)=0;
+10
+SELECT count() FROM test_02559 PREWHERE ignore(id1);
+0
+SELECT count() FROM test_02559 PREWHERE 1 OR ignore(id1);
+10
+SELECT count() FROM test_02559 PREWHERE ignore(id1) AND id2 > 0;
+0
+SELECT count() FROM test_02559 PREWHERE (1 OR ignore(id1)) AND id2 > 0;
+9
+SELECT count() FROM test_02559 PREWHERE (id1 <= 10 AND id2 > 0) AND ignore(id1);
+0
+SELECT count() FROM test_02559 PREWHERE ignore(id1) AND (id1 <= 10 AND id2 > 0);
+0
+SELECT count() FROM test_02559 PREWHERE (id1 <= 10 AND id2 > 0) AND (1 OR ignore(id1));
+9
+SELECT count() FROM test_02559 PREWHERE (1 OR ignore(id1)) AND (id1 <= 10 AND id2 > 0);
+9
+CREATE ROW POLICY 02559_filter_1 ON test_02559 USING id2=2 AS permissive TO ALL;
+SELECT * FROM test_02559;
+2	2
+CREATE ROW POLICY 02559_filter_2 ON test_02559 USING id2<=2 AS restrictive TO ALL;
+SELECT * FROM test_02559;
+2	2
--- a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql
+++ b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql
@ -0,0 +1,59 @@
+DROP TABLE IF EXISTS test_02559;
+
+CREATE TABLE test_02559 (id1 UInt64, id2 UInt64) ENGINE=MergeTree ORDER BY id1;
+
+INSERT INTO test_02559 SELECT number, number FROM numbers(10);
+
+DROP ROW POLICY IF EXISTS 02559_filter_1 ON test_02559;
+DROP ROW POLICY IF EXISTS 02559_filter_2 ON test_02559;
+
+SET enable_multiple_prewhere_read_steps=true, move_all_conditions_to_prewhere=true;
+
+-- { echoOn }
+
+SELECT cast(id1 as UInt16) AS id16 FROM test_02559 PREWHERE id16 and (id2 % 40000) LIMIT 10;
+
+SELECT cast(id1 as UInt16) AS cond1, (id2 % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE cond LIMIT 10;
+
+SELECT cast(id1 as UInt16) AS cond1, (if(id2 > 3, id2, NULL) % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE cond LIMIT 10;
+
+SELECT cast(id1 as UInt16) AS cond1, (id2 % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE cond AND id2 > 4 LIMIT 10;
+
+SELECT cast(id1 as UInt16) AS cond1, (id2 % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE id2 > 5 AND cond LIMIT 10;
+
+SELECT cast(id1 as UInt16) AS cond1, (id2 % 40000) AS cond2, (cond1 AND cond2) AS cond FROM test_02559 PREWHERE cond1 AND id2 > 6 AND cond2 LIMIT 10;
+
+SELECT cast(id1 as UInt16) AS cond1 FROM test_02559 PREWHERE cond1 LIMIT 10; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }
+
+SELECT * FROM test_02559 PREWHERE id1 <= 3 AND id2 > 0 WHERE (id1 + id2 < 15) LIMIT 10;
+
+SELECT count() FROM test_02559 PREWHERE id2>=0 AND (1 OR ignore(id1)) WHERE ignore(id1)=0;
+
+SELECT count() FROM test_02559 PREWHERE ignore(id1);
+
+SELECT count() FROM test_02559 PREWHERE 1 OR ignore(id1);
+
+SELECT count() FROM test_02559 PREWHERE ignore(id1) AND id2 > 0;
+
+SELECT count() FROM test_02559 PREWHERE (1 OR ignore(id1)) AND id2 > 0;
+
+SELECT count() FROM test_02559 PREWHERE (id1 <= 10 AND id2 > 0) AND ignore(id1);
+
+SELECT count() FROM test_02559 PREWHERE ignore(id1) AND (id1 <= 10 AND id2 > 0);
+
+SELECT count() FROM test_02559 PREWHERE (id1 <= 10 AND id2 > 0) AND (1 OR ignore(id1));
+
+SELECT count() FROM test_02559 PREWHERE (1 OR ignore(id1)) AND (id1 <= 10 AND id2 > 0);
+
+CREATE ROW POLICY 02559_filter_1 ON test_02559 USING id2=2 AS permissive TO ALL;
+SELECT * FROM test_02559;
+
+CREATE ROW POLICY 02559_filter_2 ON test_02559 USING id2<=2 AS restrictive TO ALL;
+SELECT * FROM test_02559;
+
+-- { echoOff }
+
+DROP ROW POLICY IF EXISTS 02559_filter_1 ON test_02559;
+DROP ROW POLICY IF EXISTS 02559_filter_2 ON test_02559;
+
+DROP TABLE test_02559;
--- a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere_fuzz.reference
+++ b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere_fuzz.reference
@ -0,0 +1 @@
+0
--- a/Show More
+++ b/Show More