diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 506ed451b6d..afc08f3e637 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -46,7 +46,12 @@ jobs:
       - name: Python unit tests
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
+          echo "Testing the main ci directory"
           python3 -m unittest discover -s . -p '*_test.py'
+          for dir in *_lambda/; do
+            echo "Testing $dir"
+            python3 -m unittest discover -s "$dir" -p '*_test.py'
+          done
   DockerHubPushAarch64:
     needs: CheckLabels
     runs-on: [self-hosted, style-checker-aarch64]
diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index 214f2d550b4..a1f2eb9d9ec 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -626,7 +626,9 @@ if args.report == "main":
         message_array.append(str(faster_queries) + " faster")
 
     if slower_queries:
-        if slower_queries > 3:
+        # This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/tests/ci/performance_comparison_check.py#L225
+        # False positives rate should be < 1%: https://shorturl.at/CDEK8
+        if slower_queries > 5:
             status = "failure"
         message_array.append(str(slower_queries) + " slower")
 
diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
index 2b8b43802ea..62191d9b5e4 100644
--- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
@@ -109,7 +109,7 @@ INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
  VALUES (1667446031, 1, 6, 3)
 ```
 
-The data are inserted in both the table and the materialized view `test.mv_visits`.
+The data is inserted in both the table and the materialized view `test.mv_visits`.
 
 To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`:
 
diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md
index 9730faa873c..cac75fdc45a 100644
--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@@ -75,7 +75,7 @@ SELECT
     payment_type,
     pickup_ntaname,
     dropoff_ntaname
-FROM s3(
+FROM gcs(
     'https://storage.googleapis.com/clickhouse-public-datasets/nyc-taxi/trips_{0..2}.gz',
     'TabSeparatedWithNames'
 );
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 5730503a670..5b0c6b3c8c2 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -227,6 +227,89 @@ SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_
 SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- Ok.
 ```
 
+## ignore_data_skipping_indices {#settings-ignore_data_skipping_indices}
+
+Ignores the skipping indexes specified if used by the query.
+
+Consider the following example:
+
+```sql
+CREATE TABLE data
+(
+    key Int,
+    x Int,
+    y Int,
+    INDEX x_idx x TYPE minmax GRANULARITY 1,
+    INDEX y_idx y TYPE minmax GRANULARITY 1,
+    INDEX xy_idx (x,y) TYPE minmax GRANULARITY 1
+)
+Engine=MergeTree()
+ORDER BY key;
+
+INSERT INTO data VALUES (1, 2, 3);
+
+SELECT * FROM data;
+SELECT * FROM data SETTINGS ignore_data_skipping_indices=''; -- query will produce CANNOT_PARSE_TEXT error.
+SELECT * FROM data SETTINGS ignore_data_skipping_indices='x_idx'; -- Ok.
+SELECT * FROM data SETTINGS ignore_data_skipping_indices='na_idx'; -- Ok.
+
+SELECT * FROM data WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- query will produce INDEX_NOT_USED error, since xy_idx is explictly ignored.
+SELECT * FROM data WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
+```
+
+The query without ignoring any indexes:
+```sql
+EXPLAIN indexes = 1 SELECT * FROM data WHERE x = 1 AND y = 2;
+
+Expression ((Projection + Before ORDER BY))
+  Filter (WHERE)
+    ReadFromMergeTree (default.data)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+      Skip
+        Name: xy_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+```
+
+Ignoring the `xy_idx` index:
+```sql
+EXPLAIN indexes = 1 SELECT * FROM data WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
+
+Expression ((Projection + Before ORDER BY))
+  Filter (WHERE)
+    ReadFromMergeTree (default.data)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+```
+
 Works with tables in the MergeTree family.
 
 ## convert_query_to_cnf {#convert_query_to_cnf}
diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md
index 8951ac4ee6a..5d2229fbcce 100644
--- a/docs/en/sql-reference/aggregate-functions/index.md
+++ b/docs/en/sql-reference/aggregate-functions/index.md
@@ -4,7 +4,7 @@ sidebar_label: Aggregate Functions
 sidebar_position: 33
 ---
 
-# Aggregate Functions 
+# Aggregate Functions
 
 Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) way as expected by database experts.
 
@@ -72,3 +72,16 @@ FROM t_null_big
 │ 2.3333333333333335 │                 1.4 │
 └────────────────────┴─────────────────────┘
 ```
+
+Also you can use [Tuple](/docs/en/sql-reference/data-types/tuple.md) to work around NULL skipping behavior. The a `Tuple` that contains only a `NULL` value is not `NULL`, so the aggregate functions won't skip that row because of that `NULL` value.
+
+```sql
+SELECT
+    groupArray(y),
+    groupArray(tuple(y)).1
+FROM t_null_big;
+
+┌─groupArray(y)─┬─tupleElement(groupArray(tuple(y)), 1)─┐
+│ [2,2,3]       │ [2,NULL,2,3,NULL]                     │
+└───────────────┴───────────────────────────────────────┘
+```
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
index 65c43ab04c0..8f10318838b 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
@@ -6,6 +6,7 @@ sidebar_position: 106
 # argMax
 
 Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered.
+Both parts the `arg` and the `max` behave as [aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md), they both [skip `Null`](/docs/en/sql-reference/aggregate-functions/index.md#null-processing) during processing and return not `Null` values if not `Null` values are available.
 
 **Syntax**
 
@@ -49,3 +50,60 @@ Result:
 │ director             │
 └──────────────────────┘
 ```
+
+**Extended example**
+
+```sql
+CREATE TABLE test
+(
+    a Nullable(String),
+    b Nullable(Int64)
+)
+ENGINE = Memory AS
+SELECT *
+FROM VALUES(('a', 1), ('b', 2), ('c', 2), (NULL, 3), (NULL, NULL), ('d', NULL));
+
+select * from test;
+┌─a────┬────b─┐
+│ a    │    1 │
+│ b    │    2 │
+│ c    │    2 │
+│ ᴺᵁᴸᴸ │    3 │
+│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
+│ d    │ ᴺᵁᴸᴸ │
+└──────┴──────┘
+
+SELECT argMax(a, b), max(b) FROM test;
+┌─argMax(a, b)─┬─max(b)─┐
+│ b            │      3 │ -- argMax = 'b' because it the first not Null value, max(b) is from another row!
+└──────────────┴────────┘
+
+SELECT argMax(tuple(a), b) FROM test;
+┌─argMax(tuple(a), b)─┐
+│ (NULL)              │ -- The a `Tuple` that contains only a `NULL` value is not `NULL`, so the aggregate functions won't skip that row because of that `NULL` value
+└─────────────────────┘
+
+SELECT (argMax((a, b), b) as t).1 argMaxA, t.2 argMaxB FROM test;
+┌─argMaxA─┬─argMaxB─┐
+│ ᴺᵁᴸᴸ    │       3 │ -- you can use Tuple and get both (all - tuple(*)) columns for the according max(b)
+└─────────┴─────────┘
+
+SELECT argMax(a, b), max(b) FROM test WHERE a IS NULL AND b IS NULL;
+┌─argMax(a, b)─┬─max(b)─┐
+│ ᴺᵁᴸᴸ         │   ᴺᵁᴸᴸ │ -- All aggregated rows contains at least one `NULL` value because of the filter, so all rows are skipped, therefore the result will be `NULL`
+└──────────────┴────────┘
+
+SELECT argMax(a, (b,a)) FROM test;
+┌─argMax(a, tuple(b, a))─┐
+│ c                      │ -- There are two rows with b=2, `Tuple` in the `Max` allows to get not the first `arg`
+└────────────────────────┘
+
+SELECT argMax(a, tuple(b)) FROM test;
+┌─argMax(a, tuple(b))─┐
+│ b                   │ -- `Tuple` can be used in `Max` to not skip Nulls in `Max`
+└─────────────────────┘
+```
+
+**See also**
+
+- [Tuple](/docs/en/sql-reference/data-types/tuple.md)
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
index a7c21e3f15b..fdfce0833e0 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
@@ -6,6 +6,7 @@ sidebar_position: 105
 # argMin
 
 Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered.
+Both parts the `arg` and the `min` behave as [aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md), they both [skip `Null`](/docs/en/sql-reference/aggregate-functions/index.md#null-processing) during processing and return not `Null` values if not `Null` values are available.
 
 **Syntax**
 
@@ -49,3 +50,65 @@ Result:
 │ worker               │
 └──────────────────────┘
 ```
+
+**Extended example**
+
+```sql
+CREATE TABLE test
+(
+    a Nullable(String),
+    b Nullable(Int64)
+)
+ENGINE = Memory AS
+SELECT *
+FROM VALUES((NULL, 0), ('a', 1), ('b', 2), ('c', 2), (NULL, NULL), ('d', NULL));
+
+select * from test;
+┌─a────┬────b─┐
+│ ᴺᵁᴸᴸ │    0 │
+│ a    │    1 │
+│ b    │    2 │
+│ c    │    2 │
+│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
+│ d    │ ᴺᵁᴸᴸ │
+└──────┴──────┘
+
+SELECT argMin(a, b), min(b) FROM test;
+┌─argMin(a, b)─┬─min(b)─┐
+│ a            │      0 │ -- argMin = a because it the first not `NULL` value, min(b) is from another row!
+└──────────────┴────────┘
+
+SELECT argMin(tuple(a), b) FROM test;
+┌─argMin(tuple(a), b)─┐
+│ (NULL)              │ -- The a `Tuple` that contains only a `NULL` value is not `NULL`, so the aggregate functions won't skip that row because of that `NULL` value
+└─────────────────────┘
+
+SELECT (argMin((a, b), b) as t).1 argMinA, t.2 argMinB from test;
+┌─argMinA─┬─argMinB─┐
+│ ᴺᵁᴸᴸ    │       0 │ -- you can use `Tuple` and get both (all - tuple(*)) columns for the according max(b)
+└─────────┴─────────┘
+
+SELECT argMin(a, b), min(b) FROM test WHERE a IS NULL and b IS NULL;
+┌─argMin(a, b)─┬─min(b)─┐
+│ ᴺᵁᴸᴸ         │   ᴺᵁᴸᴸ │ -- All aggregated rows contains at least one `NULL` value because of the filter, so all rows are skipped, therefore the result will be `NULL`
+└──────────────┴────────┘
+
+SELECT argMin(a, (b, a)), min(tuple(b, a)) FROM test;
+┌─argMin(a, tuple(b, a))─┬─min(tuple(b, a))─┐
+│ d                      │ (NULL,NULL)      │ -- 'd' is the first not `NULL` value for the min
+└────────────────────────┴──────────────────┘
+
+SELECT argMin((a, b), (b, a)), min(tuple(b, a)) FROM test;
+┌─argMin(tuple(a, b), tuple(b, a))─┬─min(tuple(b, a))─┐
+│ (NULL,NULL)                      │ (NULL,NULL)      │ -- argMin returns (NULL,NULL) here because `Tuple` allows to don't skip `NULL` and min(tuple(b, a)) in this case is minimal value for this dataset
+└──────────────────────────────────┴──────────────────┘
+
+SELECT argMin(a, tuple(b)) FROM test;
+┌─argMax(a, tuple(b))─┐
+│ d                   │ -- `Tuple` can be used in `min` to not skip rows with `NULL` values as b.
+└─────────────────────┘
+```
+
+**See also**
+
+- [Tuple](/docs/en/sql-reference/data-types/tuple.md)
diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md
index 42d402e9d44..d07a5292431 100644
--- a/docs/en/sql-reference/functions/index.md
+++ b/docs/en/sql-reference/functions/index.md
@@ -10,7 +10,9 @@ There are at least\* two types of functions - regular functions (they are just c
 
 In this section we discuss regular functions. For aggregate functions, see the section “Aggregate functions”.
 
-\* - There is a third type of function that the ‘arrayJoin’ function belongs to; table functions can also be mentioned separately.\*
+:::note 
+There is a third type of function that the [‘arrayJoin’ function](/docs/en/sql-reference/functions/array-join.md) belongs to. And [table functions](/docs/en/sql-reference/table-functions/index.md) can also be mentioned separately.
+:::
 
 ## Strong Typing
 
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 87f0cbc7b9e..928b35ee421 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -131,15 +131,31 @@ void LocalServer::initialize(Poco::Util::Application & self)
     });
 #endif
 
-    IOThreadPool::initialize(
+    getIOThreadPool().initialize(
         config().getUInt("max_io_thread_pool_size", 100),
         config().getUInt("max_io_thread_pool_free_size", 0),
         config().getUInt("io_thread_pool_queue_size", 10000));
 
-    OutdatedPartsLoadingThreadPool::initialize(
-        config().getUInt("max_outdated_parts_loading_thread_pool_size", 16),
+
+    const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64);
+    getActivePartsLoadingThreadPool().initialize(
+        active_parts_loading_threads,
         0, // We don't need any threads one all the parts will be loaded
-        config().getUInt("max_outdated_parts_loading_thread_pool_size", 16));
+        active_parts_loading_threads);
+
+    const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
+    getOutdatedPartsLoadingThreadPool().initialize(
+        outdated_parts_loading_threads,
+        0, // We don't need any threads one all the parts will be loaded
+        outdated_parts_loading_threads);
+
+    getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
+
+    const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128);
+    getPartsCleaningThreadPool().initialize(
+        cleanup_threads,
+        0, // We don't need any threads one all the parts will be deleted
+        cleanup_threads);
 }
 
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 0f7e743d7bb..eb74f4d80ad 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -684,21 +684,36 @@ try
     });
 #endif
 
-    IOThreadPool::initialize(
+    getIOThreadPool().initialize(
         server_settings.max_io_thread_pool_size,
         server_settings.max_io_thread_pool_free_size,
         server_settings.io_thread_pool_queue_size);
 
-    BackupsIOThreadPool::initialize(
+    getBackupsIOThreadPool().initialize(
         server_settings.max_backups_io_thread_pool_size,
         server_settings.max_backups_io_thread_pool_free_size,
         server_settings.backups_io_thread_pool_queue_size);
 
-    OutdatedPartsLoadingThreadPool::initialize(
+    getActivePartsLoadingThreadPool().initialize(
+        server_settings.max_active_parts_loading_thread_pool_size,
+        0, // We don't need any threads once all the parts will be loaded
+        server_settings.max_active_parts_loading_thread_pool_size);
+
+    getOutdatedPartsLoadingThreadPool().initialize(
         server_settings.max_outdated_parts_loading_thread_pool_size,
-        0, // We don't need any threads one all the parts will be loaded
+        0, // We don't need any threads once all the parts will be loaded
         server_settings.max_outdated_parts_loading_thread_pool_size);
 
+    /// It could grow if we need to synchronously wait until all the data parts will be loaded.
+    getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(
+        server_settings.max_active_parts_loading_thread_pool_size
+    );
+
+    getPartsCleaningThreadPool().initialize(
+        server_settings.max_parts_cleaning_thread_pool_size,
+        0, // We don't need any threads one all the parts will be deleted
+        server_settings.max_parts_cleaning_thread_pool_size);
+
     /// Initialize global local cache for remote filesystem.
     if (config().has("local_cache_for_remote_fs"))
     {
@@ -1233,6 +1248,36 @@ try
             global_context->getAsyncLoader().setMaxThreads(AsyncLoaderPoolId::BackgroundLoad, bg_pool_size ? bg_pool_size : getNumberOfPhysicalCPUCores());
             global_context->getAsyncLoader().setMaxThreads(AsyncLoaderPoolId::BackgroundStartup, bg_pool_size ? bg_pool_size : getNumberOfPhysicalCPUCores());
 
+            getIOThreadPool().reloadConfiguration(
+                server_settings.max_io_thread_pool_size,
+                server_settings.max_io_thread_pool_free_size,
+                server_settings.io_thread_pool_queue_size);
+
+            getBackupsIOThreadPool().reloadConfiguration(
+                server_settings.max_backups_io_thread_pool_size,
+                server_settings.max_backups_io_thread_pool_free_size,
+                server_settings.backups_io_thread_pool_queue_size);
+
+            getActivePartsLoadingThreadPool().reloadConfiguration(
+                server_settings.max_active_parts_loading_thread_pool_size,
+                0, // We don't need any threads once all the parts will be loaded
+                server_settings.max_active_parts_loading_thread_pool_size);
+
+            getOutdatedPartsLoadingThreadPool().reloadConfiguration(
+                server_settings.max_outdated_parts_loading_thread_pool_size,
+                0, // We don't need any threads once all the parts will be loaded
+                server_settings.max_outdated_parts_loading_thread_pool_size);
+
+            /// It could grow if we need to synchronously wait until all the data parts will be loaded.
+            getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(
+                server_settings.max_active_parts_loading_thread_pool_size
+            );
+
+            getPartsCleaningThreadPool().reloadConfiguration(
+                server_settings.max_parts_cleaning_thread_pool_size,
+                0, // We don't need any threads one all the parts will be deleted
+                server_settings.max_parts_cleaning_thread_pool_size);
+
             if (config->has("resources"))
             {
                 global_context->getResourceManager()->updateConfiguration(*config);
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index f1fd276e34b..967beba4bf5 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -161,7 +161,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 /* dest_key= */ blob_path[0],
                 request_settings,
                 object_attributes,
-                threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupReaderS3"),
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
                 /* for_disk_s3= */ true);
 
             return file_size;
@@ -212,7 +212,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
                 fs::path(s3_uri.key) / path_in_backup,
                 request_settings,
                 {},
-                threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
+                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
             return; /// copied!
         }
     }
@@ -224,7 +224,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
 void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
     copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, request_settings, {},
-                     threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
+                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
 BackupWriterS3::~BackupWriterS3() = default;
@@ -258,7 +258,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
         DBMS_DEFAULT_BUFFER_SIZE,
         request_settings,
         std::nullopt,
-        threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"),
+        threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
         write_settings);
 }
 
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 1de047b634f..b7e4e2b733b 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -278,7 +278,7 @@ public:
     static Int32 cancelled_status() { return exit_after_signals.load(); }
 };
 
-/// This signal handler is set only for SIGINT.
+/// This signal handler is set for SIGINT and SIGQUIT.
 void interruptSignalHandler(int signum)
 {
     if (QueryInterruptHandler::try_stop())
@@ -317,6 +317,9 @@ void ClientBase::setupSignalHandler()
 
     if (sigaction(SIGINT, &new_act, nullptr))
         throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+
+    if (sigaction(SIGQUIT, &new_act, nullptr))
+        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
 }
 
 
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 89607edc425..b09388ac534 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -135,6 +135,8 @@
     M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
     M(MergeTreePartsLoaderThreads, "Number of threads in the MergeTree parts loader thread pool.") \
     M(MergeTreePartsLoaderThreadsActive, "Number of threads in the MergeTree parts loader thread pool running a task.") \
+    M(MergeTreeOutdatedPartsLoaderThreads, "Number of threads in the threadpool for loading Outdated data parts.") \
+    M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
     M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
     M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
     M(SystemReplicasThreads, "Number of threads in the system.replicas thread pool.") \
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index d4788f98325..64330537ab7 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -21,7 +21,9 @@ namespace DB
     M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
     M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
     M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
-    M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The maximum number of threads that would be used for loading outdated data parts on startup", 0) \
+    M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
+    M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
+    M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
     M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
     M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
     M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 6a0833aef60..0037acedede 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -201,6 +201,8 @@ class IColumn;
     M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
     M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
     M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \
+    M(String, ignore_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be excluded during query execution.", 0) \
+    \
     M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
     \
     M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \
diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp
index ee1327ef094..76be273d7dc 100644
--- a/src/DataTypes/Serializations/SerializationUUID.cpp
+++ b/src/DataTypes/Serializations/SerializationUUID.cpp
@@ -51,19 +51,11 @@ void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
     {
         assertChar('\'', istr);
         char * next_pos = find_first_symbols<'\\', '\''>(istr.position(), istr.buffer().end());
-        size_t len = next_pos - istr.position();
-        if ((len == 32) && (istr.position()[32] == '\''))
+        const size_t len = next_pos - istr.position();
+        if ((len == 32 || len == 36) && istr.position()[len] == '\'')
         {
-            parseUUIDWithoutSeparator(
-                reinterpret_cast<const UInt8 *>(istr.position()), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
-            istr.ignore(33);
-            fast = true;
-        }
-        else if ((len == 36) && (istr.position()[36] == '\''))
-        {
-            parseUUID(
-                reinterpret_cast<const UInt8 *>(istr.position()), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
-            istr.ignore(37);
+            uuid = parseUUID(std::span(reinterpret_cast<const UInt8 *>(istr.position()), len));
+            istr.ignore(len + 1);
             fast = true;
         }
         else
diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp
index 8d0af9b0abf..3852cca6928 100644
--- a/src/Dictionaries/RegExpTreeDictionary.cpp
+++ b/src/Dictionaries/RegExpTreeDictionary.cpp
@@ -129,17 +129,6 @@ struct RegExpTreeDictionary::RegexTreeNode
         return searcher.Match(haystack, 0, size, re2_st::RE2::Anchor::UNANCHORED, nullptr, 0);
     }
 
-    /// check if this node can cover all the attributes from the query.
-    bool containsAll(const std::unordered_map<String, const DictionaryAttribute &> & matching_attributes) const
-    {
-        for (const auto & [key, value] : matching_attributes)
-        {
-            if (!attributes.contains(key))
-                return false;
-        }
-        return true;
-    }
-
     struct AttributeValue
     {
         Field field;
@@ -691,9 +680,6 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
             if (node_ptr->match(reinterpret_cast<const char *>(keys_data.data()) + offset, length))
             {
                 match_result.insertNodeID(node_ptr->id);
-                /// When this node is leaf and contains all the required attributes, it means a match.
-                if (node_ptr->containsAll(attributes) && node_ptr->children.empty())
-                    break;
             }
         }
 
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 7c497baa450..6bf72434580 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -945,18 +945,23 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
             ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheBytes, size);
             ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMicroseconds, elapsed);
 
-#ifdef ABORT_ON_LOGICAL_ERROR
             const size_t new_file_offset = file_offset_of_buffer_end + size;
-            chassert(new_file_offset - 1 <= file_segment.range().right);
             const size_t file_segment_write_offset = file_segment.getCurrentWriteOffset(true);
+            if (new_file_offset > file_segment.range().right + 1)
+            {
+                auto file_segment_path = file_segment.getPathInLocalCache();
+                throw Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Read unexpected size. File size: {}, file path: {}, file segment info: {}",
+                    fs::file_size(file_segment_path), file_segment_path, file_segment.getInfoForLog());
+            }
             if (new_file_offset > file_segment_write_offset)
             {
-                LOG_TRACE(
-                    log, "Read {} bytes, file offset: {}, segment: {}, segment write offset: {}",
+                throw Exception(
+                    ErrorCodes::LOGICAL_ERROR,
+                    "Read unexpected size. Read {} bytes, file offset: {}, segment: {}, segment write offset: {}",
                     size, file_offset_of_buffer_end, file_segment.range().toString(), file_segment_write_offset);
-                chassert(false);
             }
-#endif
         }
         else
         {
@@ -1219,7 +1224,7 @@ off_t CachedOnDiskReadBufferFromFile::getPosition()
 
 void CachedOnDiskReadBufferFromFile::assertCorrectness() const
 {
-    if (!CachedObjectStorage::canUseReadThroughCache()
+    if (!CachedObjectStorage::canUseReadThroughCache(settings)
         && !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache usage is not allowed (query_id: {})", query_id);
 }
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 12fbbbcf747..04030fe5f8f 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -36,7 +36,7 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
 
     with_cache = settings.remote_fs_cache
         && settings.enable_filesystem_cache
-        && (!query_id.empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache);
+        && (!query_id.empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache || !settings.avoid_readthrough_cache_outside_query_context);
 }
 
 SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object)
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index 1d24d9d5411..3e73e45638b 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -57,7 +57,7 @@ ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settin
     ReadSettings modified_settings{read_settings};
     modified_settings.remote_fs_cache = cache;
 
-    if (!canUseReadThroughCache())
+    if (!canUseReadThroughCache(read_settings))
         modified_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
 
     return object_storage->patchSettings(modified_settings);
@@ -227,8 +227,11 @@ String CachedObjectStorage::getObjectsNamespace() const
     return object_storage->getObjectsNamespace();
 }
 
-bool CachedObjectStorage::canUseReadThroughCache()
+bool CachedObjectStorage::canUseReadThroughCache(const ReadSettings & settings)
 {
+    if (!settings.avoid_readthrough_cache_outside_query_context)
+        return true;
+
     return CurrentThread::isInitialized()
         && CurrentThread::get().getQueryContext()
         && !CurrentThread::getQueryId().empty();
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index b5186d39c32..ba9fbd02d94 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -112,7 +112,9 @@ public:
 
     WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & path) const override;
 
-    static bool canUseReadThroughCache();
+    const FileCacheSettings & getCacheSettings() const { return cache_settings; }
+
+    static bool canUseReadThroughCache(const ReadSettings & settings);
 
 private:
     FileCache::Key getCacheKey(const std::string & path) const;
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index 129f1ab1ef7..005d115a277 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -596,7 +596,8 @@ void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, W
 {
     LOG_TEST(log, "Write file: {}", path);
     auto transaction = createObjectStorageTransaction();
-    return transaction->writeFileUsingBlobWritingFunction(path, mode, std::move(write_blob_function));
+    transaction->writeFileUsingBlobWritingFunction(path, mode, std::move(write_blob_function));
+    transaction->commit();
 }
 
 void DiskObjectStorage::applyNewSettings(
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index 257a6fdf2ea..bd66ada492f 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -710,8 +710,6 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
         metadata_transaction->createMetadataFile(path, blob_name, object_size);
     else
         metadata_transaction->addBlobToMetadata(path, blob_name, object_size);
-
-    metadata_transaction->commit();
 }
 
 
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 6f2974c49c6..39b28e025a6 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -364,7 +364,7 @@ std::unique_ptr<ReadBuffer> FormatFactory::wrapReadBufferIfNeeded(
             settings.max_download_buffer_size);
 
         res = wrapInParallelReadBufferIfSupported(
-            buf, threadPoolCallbackRunner<void>(IOThreadPool::get(), "ParallelRead"),
+            buf, threadPoolCallbackRunner<void>(getIOThreadPool().get(), "ParallelRead"),
             max_download_threads, settings.max_download_buffer_size, file_size);
     }
 
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 8dc05e75855..9896468e616 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -31,6 +31,7 @@ namespace ErrorCodes
     extern const int CANNOT_PARSE_QUOTED_STRING;
     extern const int CANNOT_PARSE_DATETIME;
     extern const int CANNOT_PARSE_DATE;
+    extern const int CANNOT_PARSE_UUID;
     extern const int INCORRECT_DATA;
     extern const int ATTEMPT_TO_READ_AFTER_EOF;
     extern const int LOGICAL_ERROR;
@@ -46,48 +47,45 @@ inline void parseHex(IteratorSrc src, IteratorDst dst)
         dst[dst_pos] = unhex2(reinterpret_cast<const char *>(&src[src_pos]));
 }
 
-void parseUUID(const UInt8 * src36, UInt8 * dst16)
+UUID parseUUID(std::span<const UInt8> src)
 {
-    /// If string is not like UUID - implementation specific behaviour.
+    UUID uuid;
+    const auto * src_ptr = src.data();
+    auto * dst = reinterpret_cast<UInt8 *>(&uuid);
+    const auto size = src.size();
 
-    parseHex<4>(&src36[0], &dst16[0]);
-    parseHex<2>(&src36[9], &dst16[4]);
-    parseHex<2>(&src36[14], &dst16[6]);
-    parseHex<2>(&src36[19], &dst16[8]);
-    parseHex<6>(&src36[24], &dst16[10]);
-}
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    const std::reverse_iterator dst_it(dst + sizeof(UUID));
+#endif
+    if (size == 36)
+    {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        parseHex<4>(src_ptr, dst_it + 8);
+        parseHex<2>(src_ptr + 9, dst_it + 12);
+        parseHex<2>(src_ptr + 14, dst_it + 14);
+        parseHex<2>(src_ptr + 19, dst_it);
+        parseHex<6>(src_ptr + 24, dst_it + 2);
+#else
+        parseHex<4>(src_ptr, dst);
+        parseHex<2>(src_ptr + 9, dst + 4);
+        parseHex<2>(src_ptr + 14, dst + 6);
+        parseHex<2>(src_ptr + 19, dst + 8);
+        parseHex<6>(src_ptr + 24, dst + 10);
+#endif
+    }
+    else if (size == 32)
+    {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        parseHex<8>(src_ptr, dst_it + 8);
+        parseHex<8>(src_ptr + 16, dst_it);
+#else
+        parseHex<16>(src_ptr, dst);
+#endif
+    }
+    else
+        throw Exception(ErrorCodes::CANNOT_PARSE_UUID, "Unexpected length when trying to parse UUID ({})", size);
 
-void parseUUIDWithoutSeparator(const UInt8 * src36, UInt8 * dst16)
-{
-    /// If string is not like UUID - implementation specific behaviour.
-
-    parseHex<16>(&src36[0], &dst16[0]);
-}
-
-/** Function used when byte ordering is important when parsing uuid
- *  ex: When we create an UUID type
- */
-void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16)
-{
-    /// If string is not like UUID - implementation specific behaviour.
-
-    /// FIXME This code looks like trash.
-    parseHex<4>(&src36[0], dst16 + 8);
-    parseHex<2>(&src36[9], dst16 + 12);
-    parseHex<2>(&src36[14], dst16 + 14);
-    parseHex<2>(&src36[19], dst16);
-    parseHex<6>(&src36[24], dst16 + 2);
-}
-
-/** Function used when byte ordering is important when parsing uuid
- *  ex: When we create an UUID type
- */
-void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16)
-{
-    /// If string is not like UUID - implementation specific behaviour.
-
-    parseHex<8>(&src36[0], dst16 + 8);
-    parseHex<8>(&src36[16], dst16);
+    return uuid;
 }
 
 void NO_INLINE throwAtAssertionFailed(const char * s, ReadBuffer & buf)
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 32338552b66..804dab16db9 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -8,6 +8,7 @@
 #include <algorithm>
 #include <iterator>
 #include <bit>
+#include <span>
 
 #include <type_traits>
 
@@ -623,12 +624,6 @@ struct NullOutput
     void push_back(char) {} /// NOLINT
 };
 
-void parseUUID(const UInt8 * src36, UInt8 * dst16);
-void parseUUIDWithoutSeparator(const UInt8 * src36, UInt8 * dst16);
-void parseUUID(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
-void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator<UInt8 *> dst16);
-
-
 template <typename ReturnType>
 ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf);
 
@@ -770,6 +765,8 @@ inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf)
     return readDateTextImpl<bool>(date, buf);
 }
 
+UUID parseUUID(std::span<const UInt8> src);
+
 template <typename ReturnType = void>
 inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf)
 {
@@ -797,12 +794,9 @@ inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf)
                     return ReturnType(false);
                 }
             }
-
-            parseUUID(reinterpret_cast<const UInt8 *>(s), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
         }
-        else
-            parseUUIDWithoutSeparator(reinterpret_cast<const UInt8 *>(s), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
 
+        uuid = parseUUID({reinterpret_cast<const UInt8 *>(s), size});
         return ReturnType(true);
     }
     else
diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h
index e43ecd7f275..dae4261e92c 100644
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@@ -99,6 +99,8 @@ struct ReadSettings
     bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
     bool enable_filesystem_cache_log = false;
     bool is_file_cache_persistent = false; /// Some files can be made non-evictable.
+    /// Don't populate cache when the read is not part of query execution (e.g. background thread).
+    bool avoid_readthrough_cache_outside_query_context = true;
 
     size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024);
     bool skip_download_if_exceeds_query_cache = true;
diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp
index b7b6aea1567..6a0e953f0ef 100644
--- a/src/IO/SharedThreadPools.cpp
+++ b/src/IO/SharedThreadPools.cpp
@@ -9,8 +9,12 @@ namespace CurrentMetrics
     extern const Metric IOThreadsActive;
     extern const Metric BackupsIOThreads;
     extern const Metric BackupsIOThreadsActive;
-    extern const Metric OutdatedPartsLoadingThreads;
-    extern const Metric OutdatedPartsLoadingThreadsActive;
+    extern const Metric MergeTreePartsLoaderThreads;
+    extern const Metric MergeTreePartsLoaderThreadsActive;
+    extern const Metric MergeTreePartsCleanerThreads;
+    extern const Metric MergeTreePartsCleanerThreadsActive;
+    extern const Metric MergeTreeOutdatedPartsLoaderThreads;
+    extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive;
 }
 
 namespace DB
@@ -21,88 +25,117 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-std::unique_ptr<ThreadPool> IOThreadPool::instance;
 
-void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
+StaticThreadPool::StaticThreadPool(
+    const String & name_,
+    CurrentMetrics::Metric threads_metric_,
+    CurrentMetrics::Metric threads_active_metric_)
+    : name(name_)
+    , threads_metric(threads_metric_)
+    , threads_active_metric(threads_active_metric_)
+{
+}
+
+void StaticThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
 {
     if (instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is initialized twice", name);
 
+    /// By default enabling "turbo mode" won't affect the number of threads anyhow
+    max_threads_turbo = max_threads;
+    max_threads_normal = max_threads;
     instance = std::make_unique<ThreadPool>(
-        CurrentMetrics::IOThreads,
-        CurrentMetrics::IOThreadsActive,
+        threads_metric,
+        threads_active_metric,
         max_threads,
         max_free_threads,
         queue_size,
         /* shutdown_on_exception= */ false);
 }
 
-ThreadPool & IOThreadPool::get()
+void StaticThreadPool::reloadConfiguration(size_t max_threads, size_t max_free_threads, size_t queue_size)
 {
     if (!instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
+
+    instance->setMaxThreads(turbo_mode_enabled > 0 ? max_threads_turbo : max_threads);
+    instance->setMaxFreeThreads(max_free_threads);
+    instance->setQueueSize(queue_size);
+}
+
+
+ThreadPool & StaticThreadPool::get()
+{
+    if (!instance)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
 
     return *instance;
 }
 
-std::unique_ptr<ThreadPool> BackupsIOThreadPool::instance;
-
-void BackupsIOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
-{
-    if (instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is initialized twice");
-    }
-
-    instance = std::make_unique<ThreadPool>(
-        CurrentMetrics::BackupsIOThreads,
-        CurrentMetrics::BackupsIOThreadsActive,
-        max_threads,
-        max_free_threads,
-        queue_size,
-        /* shutdown_on_exception= */ false);
-}
-
-ThreadPool & BackupsIOThreadPool::get()
+void StaticThreadPool::enableTurboMode()
 {
     if (!instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is not initialized");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
 
-    return *instance;
+    std::lock_guard lock(mutex);
+
+    ++turbo_mode_enabled;
+    if (turbo_mode_enabled == 1)
+        instance->setMaxThreads(max_threads_turbo);
 }
 
-std::unique_ptr<ThreadPool> OutdatedPartsLoadingThreadPool::instance;
-
-void OutdatedPartsLoadingThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
-{
-    if (instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The PartsLoadingThreadPool thread pool is initialized twice");
-    }
-
-    instance = std::make_unique<ThreadPool>(
-        CurrentMetrics::OutdatedPartsLoadingThreads,
-        CurrentMetrics::OutdatedPartsLoadingThreadsActive,
-        max_threads,
-        max_free_threads,
-        queue_size,
-        /* shutdown_on_exception= */ false);
-}
-
-ThreadPool & OutdatedPartsLoadingThreadPool::get()
+void StaticThreadPool::disableTurboMode()
 {
     if (!instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The PartsLoadingThreadPool thread pool is not initialized");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
 
-    return *instance;
+    std::lock_guard lock(mutex);
+
+    --turbo_mode_enabled;
+    if (turbo_mode_enabled == 0)
+        instance->setMaxThreads(max_threads_normal);
+}
+
+void StaticThreadPool::setMaxTurboThreads(size_t max_threads_turbo_)
+{
+    if (!instance)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The {} is not initialized", name);
+
+    std::lock_guard lock(mutex);
+
+    max_threads_turbo = max_threads_turbo_;
+    if (turbo_mode_enabled > 0)
+        instance->setMaxThreads(max_threads_turbo);
+}
+
+StaticThreadPool & getIOThreadPool()
+{
+    static StaticThreadPool instance("IOThreadPool", CurrentMetrics::IOThreads, CurrentMetrics::IOThreadsActive);
+    return instance;
+}
+
+StaticThreadPool & getBackupsIOThreadPool()
+{
+    static StaticThreadPool instance("BackupsIOThreadPool", CurrentMetrics::BackupsIOThreads, CurrentMetrics::BackupsIOThreadsActive);
+    return instance;
+}
+
+StaticThreadPool & getActivePartsLoadingThreadPool()
+{
+    static StaticThreadPool instance("MergeTreePartsLoaderThreadPool", CurrentMetrics::MergeTreePartsLoaderThreads, CurrentMetrics::MergeTreePartsLoaderThreadsActive);
+    return instance;
+}
+
+StaticThreadPool & getPartsCleaningThreadPool()
+{
+    static StaticThreadPool instance("MergeTreePartsCleanerThreadPool", CurrentMetrics::MergeTreePartsCleanerThreads, CurrentMetrics::MergeTreePartsCleanerThreadsActive);
+    return instance;
+}
+
+StaticThreadPool & getOutdatedPartsLoadingThreadPool()
+{
+    static StaticThreadPool instance("MergeTreeOutdatedPartsLoaderThreadPool", CurrentMetrics::MergeTreeOutdatedPartsLoaderThreads, CurrentMetrics::MergeTreeOutdatedPartsLoaderThreadsActive);
+    return instance;
 }
 
 }
diff --git a/src/IO/SharedThreadPools.h b/src/IO/SharedThreadPools.h
index 1b43dfe778c..188a2a4f003 100644
--- a/src/IO/SharedThreadPools.h
+++ b/src/IO/SharedThreadPools.h
@@ -1,48 +1,64 @@
 #pragma once
 
+#include <base/types.h>
 #include <Common/ThreadPool_fwd.h>
+#include <Common/CurrentMetrics.h>
+
 #include <cstdlib>
 #include <memory>
+#include <mutex>
 
 namespace DB
 {
 
-/*
- * ThreadPool used for the IO.
- */
-class IOThreadPool
+class StaticThreadPool
 {
-    static std::unique_ptr<ThreadPool> instance;
-
 public:
-    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
-    static ThreadPool & get();
+    StaticThreadPool(
+        const String & name_,
+        CurrentMetrics::Metric threads_metric_,
+        CurrentMetrics::Metric threads_active_metric_);
+
+    ThreadPool & get();
+
+    void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
+    void reloadConfiguration(size_t max_threads, size_t max_free_threads, size_t queue_size);
+
+    /// At runtime we can increase the number of threads up the specified limit
+    /// This is needed to utilize as much a possible resources to accomplish some task.
+    void setMaxTurboThreads(size_t max_threads_turbo_);
+    void enableTurboMode();
+    void disableTurboMode();
+
+private:
+    const String name;
+    const CurrentMetrics::Metric threads_metric;
+    const CurrentMetrics::Metric threads_active_metric;
+
+    std::unique_ptr<ThreadPool> instance;
+    std::mutex mutex;
+    size_t max_threads_turbo = 0;
+    size_t max_threads_normal = 0;
+    /// If this counter is > 0 - this specific mode is enabled
+    size_t turbo_mode_enabled = 0;
 };
 
+/// ThreadPool used for the IO.
+StaticThreadPool & getIOThreadPool();
 
-/*
- * ThreadPool used for the Backup IO.
- */
-class BackupsIOThreadPool
-{
-    static std::unique_ptr<ThreadPool> instance;
+/// ThreadPool used for the Backup IO.
+StaticThreadPool & getBackupsIOThreadPool();
 
-public:
-    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
-    static ThreadPool & get();
-};
+/// ThreadPool used for the loading of Outdated data parts for MergeTree tables.
+StaticThreadPool & getActivePartsLoadingThreadPool();
 
+/// ThreadPool used for deleting data parts for MergeTree tables.
+StaticThreadPool & getPartsCleaningThreadPool();
 
-/*
- * ThreadPool used for the loading of Outdated data parts for MergeTree tables.
- */
-class OutdatedPartsLoadingThreadPool
-{
-    static std::unique_ptr<ThreadPool> instance;
-
-public:
-    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
-    static ThreadPool & get();
-};
+/// This ThreadPool is used for the loading of Outdated data parts for MergeTree tables.
+/// Normally we will just load Outdated data parts concurrently in background, but in
+/// case when we need to synchronously wait for the loading to be finished, we can increase
+/// the number of threads by calling enableTurboMode() :-)
+StaticThreadPool & getOutdatedPartsLoadingThreadPool();
 
 }
diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp
index a0eceddc6f6..4f1a95181d4 100644
--- a/src/IO/WriteHelpers.cpp
+++ b/src/IO/WriteHelpers.cpp
@@ -20,20 +20,35 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes)
     }
 }
 
-/** Function used when byte ordering is important when parsing uuid
- *  ex: When we create an UUID type
- */
-void formatUUID(std::reverse_iterator<const UInt8 *> src16, UInt8 * dst36)
+std::array<char, 36> formatUUID(const UUID & uuid)
 {
-    formatHex(src16 + 8, &dst36[0], 4);
-    dst36[8] = '-';
-    formatHex(src16 + 12, &dst36[9], 2);
-    dst36[13] = '-';
-    formatHex(src16 + 14, &dst36[14], 2);
-    dst36[18] = '-';
-    formatHex(src16, &dst36[19], 2);
-    dst36[23] = '-';
-    formatHex(src16 + 2, &dst36[24], 6);
+    std::array<char, 36> dst;
+    const auto * src_ptr = reinterpret_cast<const UInt8 *>(&uuid);
+    auto * dst_ptr = dst.data();
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    const std::reverse_iterator src_it(src_ptr + 16);
+    formatHex(src_it + 8, dst_ptr, 4);
+    dst[8] = '-';
+    formatHex(src_it + 12, dst_ptr + 9, 2);
+    dst[13] = '-';
+    formatHex(src_it + 14, dst_ptr + 14, 2);
+    dst[18] = '-';
+    formatHex(src_it, dst_ptr + 19, 2);
+    dst[23] = '-';
+    formatHex(src_it + 2, dst_ptr + 24, 6);
+#else
+    formatHex(src_ptr, dst_ptr, 4);
+    dst[8] = '-';
+    formatHex(src_ptr + 4, dst_ptr + 9, 2);
+    dst[13] = '-';
+    formatHex(src_ptr + 6, dst_ptr + 14, 2);
+    dst[18] = '-';
+    formatHex(src_ptr + 8, dst_ptr + 19, 2);
+    dst[23] = '-';
+    formatHex(src_ptr + 10, dst_ptr + 24, 6);
+#endif
+
+    return dst;
 }
 
 void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf)
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index cdbc952690c..056c2ca1b50 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -625,13 +625,15 @@ inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf)
     writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf);
 }
 
-void formatUUID(std::reverse_iterator<const UInt8 *> src16, UInt8 * dst36);
+/// @brief Serialize `uuid` into an array of characters in big-endian byte order.
+/// @param uuid UUID to serialize.
+/// @return Array of characters in big-endian byte order.
+std::array<char, 36> formatUUID(const UUID & uuid);
 
 inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
 {
-    char s[36];
-    formatUUID(std::reverse_iterator<const UInt8 *>(reinterpret_cast<const UInt8 *>(&uuid) + 16), reinterpret_cast<UInt8 *>(s));
-    buf.write(s, sizeof(s));
+    const auto serialized_uuid = formatUUID(uuid);
+    buf.write(serialized_uuid.data(), serialized_uuid.size());
 }
 
 void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf);
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 79a9765108f..50e4f6e7580 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -74,12 +74,12 @@ const String & FileCache::getBasePath() const
 
 String FileCache::getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const
 {
-    return metadata.getPathInLocalCache(key, offset, segment_kind);
+    return metadata.getPathForFileSegment(key, offset, segment_kind);
 }
 
 String FileCache::getPathInLocalCache(const Key & key) const
 {
-    return metadata.getPathInLocalCache(key);
+    return metadata.getPathForKey(key);
 }
 
 void FileCache::assertInitialized() const
@@ -149,7 +149,7 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment:
     auto add_to_result = [&](const FileSegmentMetadata & file_segment_metadata)
     {
         FileSegmentPtr file_segment;
-        if (file_segment_metadata.valid())
+        if (!file_segment_metadata.evicting())
         {
             file_segment = file_segment_metadata.file_segment;
             if (file_segment->isDownloaded())
@@ -650,7 +650,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
             }
 
             ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments);
-            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size());
+            ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->getDownloadedSize(false));
 
             locked_key.removeFileSegment(segment->offset(), segment->lock());
             return PriorityIterationResult::REMOVE_AND_CONTINUE;
@@ -1056,7 +1056,7 @@ std::vector<String> FileCache::tryGetCachePaths(const Key & key)
     for (const auto & [offset, file_segment_metadata] : *locked_key->getKeyMetadata())
     {
         if (file_segment_metadata->file_segment->state() == FileSegment::State::DOWNLOADED)
-            cache_paths.push_back(metadata.getPathInLocalCache(key, offset, file_segment_metadata->file_segment->getKind()));
+            cache_paths.push_back(metadata.getPathForFileSegment(key, offset, file_segment_metadata->file_segment->getKind()));
     }
     return cache_paths;
 }
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 9370b64b2d4..7b82c58080c 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -314,6 +314,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
     if (!size)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed");
 
+    const auto file_segment_path = getPathInLocalCache();
+
     {
         auto lock = segment_guard.lock();
 
@@ -352,7 +354,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
                     "Cache writer was finalized (downloaded size: {}, state: {})",
                     current_downloaded_size, stateToString(download_state));
 
-            cache_writer = std::make_unique<WriteBufferFromFile>(getPathInLocalCache());
+            cache_writer = std::make_unique<WriteBufferFromFile>(file_segment_path);
         }
     }
 
@@ -366,7 +368,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
 
         downloaded_size += size;
 
-        chassert(std::filesystem::file_size(getPathInLocalCache()) == downloaded_size);
+        chassert(std::filesystem::file_size(file_segment_path) == downloaded_size);
     }
     catch (ErrnoException & e)
     {
@@ -376,9 +378,10 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
         int code = e.getErrno();
         if (code == /* No space left on device */28 || code == /* Quota exceeded */122)
         {
-            const auto file_size = fs::file_size(getPathInLocalCache());
+            const auto file_size = fs::file_size(file_segment_path);
             chassert(downloaded_size <= file_size);
             chassert(reserved_size >= file_size);
+            chassert(file_size <= range().size());
             if (downloaded_size != file_size)
                 downloaded_size = file_size;
         }
@@ -523,8 +526,8 @@ void FileSegment::setDownloadedUnlocked(const FileSegmentGuard::Lock &)
         remote_file_reader.reset();
     }
 
-    chassert(getDownloadedSize(false) > 0);
-    chassert(fs::file_size(getPathInLocalCache()) > 0);
+    chassert(downloaded_size > 0);
+    chassert(fs::file_size(getPathInLocalCache()) == downloaded_size);
 }
 
 void FileSegment::setDownloadFailedUnlocked(const FileSegmentGuard::Lock & lock)
@@ -848,7 +851,8 @@ void FileSegment::detach(const FileSegmentGuard::Lock & lock, const LockedKey &)
     if (download_state == State::DETACHED)
         return;
 
-    resetDownloaderUnlocked(lock);
+    if (!downloader_id.empty())
+        resetDownloaderUnlocked(lock);
     setDetachedState(lock);
 }
 
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index 163a15fcfda..75395a671f4 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -85,7 +85,7 @@ public:
         EMPTY,
         /**
          * A newly created file segment never has DOWNLOADING state until call to getOrSetDownloader
-         * because each cache user might acquire multiple file segments and reads them one by one,
+         * because each cache user might acquire multiple file segments and read them one by one,
          * so only user which actually needs to read this segment earlier than others - becomes a downloader.
          */
         DOWNLOADING,
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index ad63dcc7ea5..93343398783 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -85,6 +85,7 @@ public:
 
     virtual void removeAll(const CacheGuard::Lock &) = 0;
 
+    /// From lowest to highest priority.
     virtual void iterate(IterateFunc && func, const CacheGuard::Lock &) = 0;
 
 private:
diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp
index 843ffd45b63..740d3be72b8 100644
--- a/src/Interpreters/Cache/Metadata.cpp
+++ b/src/Interpreters/Cache/Metadata.cpp
@@ -145,15 +145,12 @@ String CacheMetadata::getFileNameForFileSegment(size_t offset, FileSegmentKind s
     return std::to_string(offset) + file_suffix;
 }
 
-String CacheMetadata::getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const
+String CacheMetadata::getPathForFileSegment(const Key & key, size_t offset, FileSegmentKind segment_kind) const
 {
-    String file_suffix;
-
-    const auto key_str = key.toString();
-    return fs::path(path) / key_str.substr(0, 3) / key_str / getFileNameForFileSegment(offset, segment_kind);
+    return fs::path(getPathForKey(key)) / getFileNameForFileSegment(offset, segment_kind);
 }
 
-String CacheMetadata::getPathInLocalCache(const Key & key) const
+String CacheMetadata::getPathForKey(const Key & key) const
 {
     const auto key_str = key.toString();
     return fs::path(path) / key_str.substr(0, 3) / key_str;
@@ -178,7 +175,7 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata(
 
             it = emplace(
                 key, std::make_shared<KeyMetadata>(
-                    key, getPathInLocalCache(key), *cleanup_queue, is_initial_load)).first;
+                    key, getPathForKey(key), *cleanup_queue, is_initial_load)).first;
         }
 
         key_metadata = it->second;
@@ -260,7 +257,7 @@ void CacheMetadata::doCleanup()
         erase(it);
         LOG_DEBUG(log, "Key {} is removed from metadata", cleanup_key);
 
-        const fs::path key_directory = getPathInLocalCache(cleanup_key);
+        const fs::path key_directory = getPathForKey(cleanup_key);
         const fs::path key_prefix_directory = key_directory.parent_path();
 
         try
@@ -346,6 +343,16 @@ void LockedKey::removeAllReleasable()
             ++it;
             continue;
         }
+        else if (it->second->evicting())
+        {
+            /// File segment is currently a removal candidate,
+            /// we do not know if it will be removed or not yet,
+            /// but its size is currently accounted as potentially removed,
+            /// so if we remove file segment now, we break the freeable_count
+            /// calculation in tryReserve.
+            ++it;
+            continue;
+        }
 
         auto file_segment = it->second->file_segment;
         it = removeFileSegment(file_segment->offset(), file_segment->lock());
@@ -370,8 +377,14 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegm
         file_segment->queue_iterator->annul();
 
     const auto path = key_metadata->getFileSegmentPath(*file_segment);
-    if (fs::exists(path))
+    bool exists = fs::exists(path);
+    if (exists)
+    {
         fs::remove(path);
+        LOG_TEST(log, "Removed file segment at path: {}", path);
+    }
+    else if (file_segment->downloaded_size)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path);
 
     file_segment->detach(segment_lock, *this);
     return key_metadata->erase(it);
diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h
index 2e015b07ed0..3fd6176f201 100644
--- a/src/Interpreters/Cache/Metadata.h
+++ b/src/Interpreters/Cache/Metadata.h
@@ -22,7 +22,7 @@ struct FileSegmentMetadata : private boost::noncopyable
 
     size_t size() const;
 
-    bool valid() const { return !removal_candidate.load(); }
+    bool evicting() const { return removal_candidate.load(); }
 
     Priority::Iterator getQueueIterator() const { return file_segment->getQueueIterator(); }
 
@@ -85,12 +85,12 @@ public:
 
     const String & getBaseDirectory() const { return path; }
 
-    String getPathInLocalCache(
+    String getPathForFileSegment(
         const Key & key,
         size_t offset,
         FileSegmentKind segment_kind) const;
 
-    String getPathInLocalCache(const Key & key) const;
+    String getPathForKey(const Key & key) const;
     static String getFileNameForFileSegment(size_t offset, FileSegmentKind segment_kind);
 
     void iterate(IterateCacheMetadataFunc && func);
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index d2be48dafb3..e84a400a220 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -833,6 +833,19 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         need_analyze_again = true;
     }
 
+    if (can_analyze_again
+        && settings.max_parallel_replicas > 1
+        && settings.allow_experimental_parallel_reading_from_replicas > 0
+        && settings.parallel_replicas_custom_key.value.empty()
+        && getTrivialCount(0).has_value())
+    {
+        /// The query could use trivial count if it didn't use parallel replicas, so let's disable it and reanalyze
+        context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
+        context->setSetting("max_parallel_replicas", UInt64{0});
+        need_analyze_again = true;
+        LOG_TRACE(log, "Disabling parallel replicas to be able to use a trivial count optimization");
+    }
+
     if (need_analyze_again)
     {
         size_t current_query_analyze_count = context->getQueryContext()->kitchen_sink.analyze_counter.load();
@@ -2254,79 +2267,84 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
     }
 }
 
-void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
+/// Based on the query analysis, check if optimizing the count trivial count to use totalRows is possible
+std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_parallel_replicas)
 {
-    auto & query = getSelectQuery();
     const Settings & settings = context->getSettingsRef();
-
-    /// Optimization for trivial query like SELECT count() FROM table.
     bool optimize_trivial_count =
         syntax_analyzer_result->optimize_trivial_count
-        && (settings.max_parallel_replicas <= 1)
+        && (max_parallel_replicas <= 1)
         && !settings.allow_experimental_query_deduplication
         && !settings.empty_result_for_aggregation_by_empty_set
         && storage
         && storage->getName() != "MaterializedMySQL"
         && !storage->hasLightweightDeletedMask()
         && query_info.filter_asts.empty()
-        && processing_stage == QueryProcessingStage::FetchColumns
         && query_analyzer->hasAggregation()
         && (query_analyzer->aggregates().size() == 1)
         && typeid_cast<const AggregateFunctionCount *>(query_analyzer->aggregates()[0].function.get());
 
-    if (optimize_trivial_count)
+    if (!optimize_trivial_count)
+        return {};
+
+    auto & query = getSelectQuery();
+    if (!query.prewhere() && !query.where() && !context->getCurrentTransaction())
+    {
+        return storage->totalRows(settings);
+    }
+    else
+    {
+        // It's possible to optimize count() given only partition predicates
+        SelectQueryInfo temp_query_info;
+        temp_query_info.query = query_ptr;
+        temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
+        temp_query_info.prepared_sets = query_analyzer->getPreparedSets();
+
+        return storage->totalRowsByPartitionPredicate(temp_query_info, context);
+    }
+}
+
+void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
+{
+    auto & query = getSelectQuery();
+    const Settings & settings = context->getSettingsRef();
+    std::optional<UInt64> num_rows;
+
+    /// Optimization for trivial query like SELECT count() FROM table.
+    if (processing_stage == QueryProcessingStage::FetchColumns && (num_rows = getTrivialCount(settings.max_parallel_replicas)))
     {
         const auto & desc = query_analyzer->aggregates()[0];
         const auto & func = desc.function;
-        std::optional<UInt64> num_rows{};
+        const AggregateFunctionCount & agg_count = static_cast<const AggregateFunctionCount &>(*func);
 
-        if (!query.prewhere() && !query.where() && !context->getCurrentTransaction())
-        {
-            num_rows = storage->totalRows(settings);
-        }
-        else // It's possible to optimize count() given only partition predicates
-        {
-            SelectQueryInfo temp_query_info;
-            temp_query_info.query = query_ptr;
-            temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
-            temp_query_info.prepared_sets = query_analyzer->getPreparedSets();
+        /// We will process it up to "WithMergeableState".
+        std::vector<char> state(agg_count.sizeOfData());
+        AggregateDataPtr place = state.data();
 
-            num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, context);
-        }
+        agg_count.create(place);
+        SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place));
 
-        if (num_rows)
-        {
-            const AggregateFunctionCount & agg_count = static_cast<const AggregateFunctionCount &>(*func);
+        agg_count.set(place, *num_rows);
 
-            /// We will process it up to "WithMergeableState".
-            std::vector<char> state(agg_count.sizeOfData());
-            AggregateDataPtr place = state.data();
+        auto column = ColumnAggregateFunction::create(func);
+        column->insertFrom(place);
 
-            agg_count.create(place);
-            SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place));
+        Block header = analysis_result.before_aggregation->getResultColumns();
+        size_t arguments_size = desc.argument_names.size();
+        DataTypes argument_types(arguments_size);
+        for (size_t j = 0; j < arguments_size; ++j)
+            argument_types[j] = header.getByName(desc.argument_names[j]).type;
 
-            agg_count.set(place, *num_rows);
+        Block block_with_count{
+            {std::move(column), std::make_shared<DataTypeAggregateFunction>(func, argument_types, desc.parameters), desc.column_name}};
 
-            auto column = ColumnAggregateFunction::create(func);
-            column->insertFrom(place);
-
-            Block header = analysis_result.before_aggregation->getResultColumns();
-            size_t arguments_size = desc.argument_names.size();
-            DataTypes argument_types(arguments_size);
-            for (size_t j = 0; j < arguments_size; ++j)
-                argument_types[j] = header.getByName(desc.argument_names[j]).type;
-
-            Block block_with_count{
-                {std::move(column), std::make_shared<DataTypeAggregateFunction>(func, argument_types, desc.parameters), desc.column_name}};
-
-            auto source = std::make_shared<SourceFromSingleChunk>(block_with_count);
-            auto prepared_count = std::make_unique<ReadFromPreparedSource>(Pipe(std::move(source)));
-            prepared_count->setStepDescription("Optimized trivial count");
-            query_plan.addStep(std::move(prepared_count));
-            from_stage = QueryProcessingStage::WithMergeableState;
-            analysis_result.first_stage = false;
-            return;
-        }
+        auto source = std::make_shared<SourceFromSingleChunk>(block_with_count);
+        auto prepared_count = std::make_unique<ReadFromPreparedSource>(Pipe(std::move(source)));
+        prepared_count->setStepDescription("Optimized trivial count");
+        query_plan.addStep(std::move(prepared_count));
+        from_stage = QueryProcessingStage::WithMergeableState;
+        analysis_result.first_stage = false;
+        return;
     }
 
     /// Limitation on the number of columns to read.
diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h
index e39dd675136..0739e818cd6 100644
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <memory>
+#include <optional>
 
 #include <Access/EnabledRowPolicies.h>
 #include <Core/QueryProcessingStage.h>
@@ -187,6 +188,7 @@ private:
     void executeExtremes(QueryPlan & query_plan);
     void executeSubqueriesInSetsAndJoins(QueryPlan & query_plan);
     bool autoFinalOnQuery(ASTSelectQuery & select_query);
+    std::optional<UInt64> getTrivialCount(UInt64 max_parallel_replicas);
 
     enum class Modificator
     {
diff --git a/src/Interpreters/OptimizeDateFilterVisitor.cpp b/src/Interpreters/OptimizeDateFilterVisitor.cpp
deleted file mode 100644
index aec2dec19c8..00000000000
--- a/src/Interpreters/OptimizeDateFilterVisitor.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-#include <Interpreters/OptimizeDateFilterVisitor.h>
-
-#include <Common/DateLUT.h>
-#include <Common/DateLUTImpl.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTFunction.h>
-
-
-namespace DB
-{
-
-ASTPtr generateOptimizedDateFilterAST(const String & comparator, const String & converter, const String & column, UInt64 compare_to)
-{
-    const DateLUTImpl & date_lut = DateLUT::instance();
-
-    String start_date;
-    String end_date;
-
-    if (converter == "toYear")
-    {
-        UInt64 year = compare_to;
-        start_date = date_lut.dateToString(date_lut.makeDayNum(year, 1, 1));
-        end_date = date_lut.dateToString(date_lut.makeDayNum(year, 12, 31));
-    }
-    else if (converter == "toYYYYMM")
-    {
-        UInt64 year = compare_to / 100;
-        UInt64 month = compare_to % 100;
-
-        if (month == 0 || month > 12) return {};
-
-        static constexpr UInt8 days_of_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
-
-        bool leap_year = (year & 3) == 0 && (year % 100 || (year % 400 == 0 && year));
-
-        start_date = date_lut.dateToString(date_lut.makeDayNum(year, month, 1));
-        end_date = date_lut.dateToString(date_lut.makeDayNum(year, month, days_of_month[month - 1] + (leap_year && month == 2)));
-    }
-    else
-    {
-        return {};
-    }
-
-    if (comparator == "equals")
-    {
-        return makeASTFunction("and",
-                                makeASTFunction("greaterOrEquals",
-                                            std::make_shared<ASTIdentifier>(column),
-                                            std::make_shared<ASTLiteral>(start_date)
-                                            ),
-                                makeASTFunction("lessOrEquals",
-                                            std::make_shared<ASTIdentifier>(column),
-                                            std::make_shared<ASTLiteral>(end_date)
-                                            )
-                                );
-    }
-    else if (comparator == "notEquals")
-    {
-        return makeASTFunction("or",
-                                makeASTFunction("less",
-                                            std::make_shared<ASTIdentifier>(column),
-                                            std::make_shared<ASTLiteral>(start_date)
-                                            ),
-                                makeASTFunction("greater",
-                                            std::make_shared<ASTIdentifier>(column),
-                                            std::make_shared<ASTLiteral>(end_date)
-                                            )
-                                );
-    }
-    else if (comparator == "less" || comparator == "greaterOrEquals")
-    {
-        return makeASTFunction(comparator,
-                    std::make_shared<ASTIdentifier>(column),
-                    std::make_shared<ASTLiteral>(start_date)
-                    );
-    }
-    else
-    {
-        return makeASTFunction(comparator,
-                    std::make_shared<ASTIdentifier>(column),
-                    std::make_shared<ASTLiteral>(end_date)
-                    );
-    }
-}
-
-bool rewritePredicateInPlace(ASTFunction & function, ASTPtr & ast)
-{
-    const static std::unordered_map<String, String> swap_relations = {
-        {"equals", "equals"},
-        {"notEquals", "notEquals"},
-        {"less", "greater"},
-        {"greater", "less"},
-        {"lessOrEquals", "greaterOrEquals"},
-        {"greaterOrEquals", "lessOrEquals"},
-    };
-
-    if (!swap_relations.contains(function.name)) return false;
-
-    if (!function.arguments || function.arguments->children.size() != 2) return false;
-
-    size_t func_id = function.arguments->children.size();
-
-    for (size_t i = 0; i < function.arguments->children.size(); i++)
-    {
-        if (const auto * func = function.arguments->children[i]->as<ASTFunction>(); func)
-        {
-            if (func->name == "toYear" || func->name == "toYYYYMM")
-            {
-                func_id = i;
-            }
-        }
-    }
-
-    if (func_id == function.arguments->children.size()) return false;
-
-    size_t literal_id = 1 - func_id;
-    const auto * literal = function.arguments->children[literal_id]->as<ASTLiteral>();
-
-    if (!literal || literal->value.getType() != Field::Types::UInt64) return false;
-
-    UInt64 compare_to = literal->value.get<UInt64>();
-    String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name);
-
-    const auto * func = function.arguments->children[func_id]->as<ASTFunction>();
-    const auto * column_id = func->arguments->children.at(0)->as<ASTIdentifier>();
-
-    if (!column_id) return false;
-
-    String column = column_id->name();
-
-    const auto new_ast = generateOptimizedDateFilterAST(comparator, func->name, column, compare_to);
-
-    if (!new_ast) return false;
-
-    ast = new_ast;
-    return true;
-}
-
-void OptimizeDateFilterInPlaceData::visit(ASTFunction & function, ASTPtr & ast) const
-{
-    rewritePredicateInPlace(function, ast);
-}
-}
diff --git a/src/Interpreters/OptimizeDateFilterVisitor.h b/src/Interpreters/OptimizeDateFilterVisitor.h
deleted file mode 100644
index 84394372901..00000000000
--- a/src/Interpreters/OptimizeDateFilterVisitor.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include <Interpreters/InDepthNodeVisitor.h>
-
-namespace DB
-{
-
-class ASTFunction;
-
-/// Rewrite the predicates in place
-class OptimizeDateFilterInPlaceData
-{
-public:
-    using TypeToVisit = ASTFunction;
-    void visit(ASTFunction & function, ASTPtr & ast) const;
-};
-
-using OptimizeDateFilterInPlaceMatcher = OneTypeMatcher<OptimizeDateFilterInPlaceData>;
-using OptimizeDateFilterInPlaceVisitor = InDepthNodeVisitor<OptimizeDateFilterInPlaceMatcher, true>;
-}
diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp
index 825114b20b7..c38b3c79026 100644
--- a/src/Interpreters/TreeOptimizer.cpp
+++ b/src/Interpreters/TreeOptimizer.cpp
@@ -25,7 +25,6 @@
 #include <Interpreters/GatherFunctionQuantileVisitor.h>
 #include <Interpreters/RewriteSumIfFunctionVisitor.h>
 #include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
-#include <Interpreters/OptimizeDateFilterVisitor.h>
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -678,21 +677,6 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context)
     RemoveInjectiveFunctionsVisitor(data).visit(query);
 }
 
-void optimizeDateFilters(ASTSelectQuery * select_query)
-{
-    /// Predicates in HAVING clause has been moved to WHERE clause.
-    if (select_query->where())
-    {
-        OptimizeDateFilterInPlaceVisitor::Data data;
-        OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refWhere());
-    }
-    if (select_query->prewhere())
-    {
-        OptimizeDateFilterInPlaceVisitor::Data data;
-        OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refPrewhere());
-    }
-}
-
 void transformIfStringsIntoEnum(ASTPtr & query)
 {
     std::unordered_set<String> function_names = {"if", "transform"};
@@ -796,9 +780,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
                 tables_with_columns, result.storage_snapshot->metadata, result.storage);
     }
 
-    /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, toISOWeek, etc.
-    optimizeDateFilters(select_query);
-
     /// GROUP BY injective function elimination.
     optimizeGroupBy(select_query, context);
 
diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index dc61e748db6..3e8fab80aaf 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -534,7 +534,7 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co
 Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint)
 {
     bool is_null = from_value.isNull();
-    if (is_null && !to_type.isNullable())
+    if (is_null && !to_type.isNullable() && !to_type.isLowCardinalityNullable())
         throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert NULL to {}", to_type.getName());
 
     Field converted = convertFieldToType(from_value, to_type, from_type_hint);
diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h
index f7324bfafe6..eb90b61cf31 100644
--- a/src/Interpreters/threadPoolCallbackRunner.h
+++ b/src/Interpreters/threadPoolCallbackRunner.h
@@ -44,6 +44,9 @@ ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool &
 
         auto future = task->get_future();
 
+        /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority".
+        /// Note: calling method scheduleOrThrowOnError in intentional, because we don't want to throw exceptions
+        /// in critical places where this callback runner is used (e.g. loading or deletion of parts)
         my_pool->scheduleOrThrowOnError([my_task = std::move(task)]{ (*my_task)(); }, priority);
 
         return future;
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 4f091f73187..9672738ae6b 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -170,7 +170,7 @@ bool applyTrivialCountIfPossible(
     QueryPlan & query_plan,
     const TableNode & table_node,
     const QueryTreeNodePtr & query_tree,
-    const ContextPtr & query_context,
+    ContextMutablePtr & query_context,
     const Names & columns_names)
 {
     const auto & settings = query_context->getSettingsRef();
@@ -208,8 +208,7 @@ bool applyTrivialCountIfPossible(
     if (storage->hasLightweightDeletedMask())
         return false;
 
-    if (settings.max_parallel_replicas > 1 ||
-        settings.allow_experimental_query_deduplication
+    if (settings.allow_experimental_query_deduplication
         || settings.empty_result_for_aggregation_by_empty_set)
         return false;
 
@@ -228,6 +227,18 @@ bool applyTrivialCountIfPossible(
     if (!num_rows)
         return false;
 
+    if (settings.max_parallel_replicas > 1)
+    {
+        if (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas == 0)
+            return false;
+
+        /// The query could use trivial count if it didn't use parallel replicas, so let's disable it
+        query_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
+        query_context->setSetting("max_parallel_replicas", UInt64{0});
+        LOG_TRACE(&Poco::Logger::get("Planner"), "Disabling parallel replicas to be able to use a trivial count optimization");
+
+    }
+
     /// Set aggregation state
     const AggregateFunctionCount & agg_count = *count_func;
     std::vector<char> state(agg_count.sizeOfData());
@@ -619,7 +630,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
             is_single_table_expression &&
             table_node &&
             select_query_info.has_aggregates &&
-            applyTrivialCountIfPossible(query_plan, *table_node, select_query_info.query_tree, planner_context->getQueryContext(), table_expression_data.getColumnNames());
+            applyTrivialCountIfPossible(query_plan, *table_node, select_query_info.query_tree, planner_context->getMutableQueryContext(), table_expression_data.getColumnNames());
 
         if (is_trivial_count_applied)
         {
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index c2602a4d1d5..60e541a0109 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -176,13 +176,16 @@ static AvroDeserializer::DeserializeFn createDecimalDeserializeFn(const avro::No
     {
         static constexpr size_t field_type_size = sizeof(typename DecimalType::FieldType);
         decoder.decodeString(tmp);
-        if (tmp.size() != field_type_size)
+        if (tmp.size() > field_type_size)
             throw ParsingException(
                 ErrorCodes::CANNOT_PARSE_UUID,
-                "Cannot parse type {}, expected binary data with size {}, got {}",
+                "Cannot parse type {}, expected binary data with size equal to or less than {}, got {}",
                 target_type->getName(),
                 field_type_size,
                 tmp.size());
+        else if (tmp.size() != field_type_size)
+            /// Add padding with 0-bytes.
+            tmp = std::string(field_type_size - tmp.size(), '\0') + tmp;
 
         typename DecimalType::FieldType field;
         ReadBufferFromString buf(tmp);
@@ -256,8 +259,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
                     if (tmp.length() != 36)
                         throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", tmp);
 
-                    UUID uuid;
-                    parseUUID(reinterpret_cast<const UInt8 *>(tmp.data()), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
+                    const UUID uuid = parseUUID({reinterpret_cast<const UInt8 *>(tmp.data()), tmp.length()});
                     assert_cast<DataTypeUUID::ColumnType &>(column).insertValue(uuid);
                     return true;
                 };
diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
index c743b2c1766..f0985e7cffc 100644
--- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
@@ -329,9 +329,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
             return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
             {
                 const auto & uuid = assert_cast<const DataTypeUUID::ColumnType &>(column).getElement(row_num);
-                std::array<UInt8, 36> s;
-                formatUUID(std::reverse_iterator<const UInt8 *>(reinterpret_cast<const UInt8 *>(&uuid) + 16), s.data());
-                encoder.encodeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
+                const auto serialized_uuid = formatUUID(uuid);
+                encoder.encodeBytes(reinterpret_cast<const uint8_t *>(serialized_uuid.data()), serialized_uuid.size());
             }};
         }
         case TypeIndex::Array:
diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index 936182f8c00..9796e696f6c 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -126,7 +126,9 @@ std::pair<std::vector<Values>, std::vector<RangesInDataParts>> split(RangesInDat
             return marks_in_current_layer < intersected_parts * 2;
         };
 
-        result_layers.emplace_back();
+        auto & current_layer = result_layers.emplace_back();
+        /// Map part_idx into index inside layer, used to merge marks from the same part into one reader
+        std::unordered_map<size_t, size_t> part_idx_in_layer;
 
         while (rows_in_current_layer < rows_per_layer || layers_intersection_is_too_big() || result_layers.size() == max_layers)
         {
@@ -140,11 +142,16 @@ std::pair<std::vector<Values>, std::vector<RangesInDataParts>> split(RangesInDat
 
                 if (current.event == PartsRangesIterator::EventType::RangeEnd)
                 {
-                    result_layers.back().emplace_back(
-                        parts[part_idx].data_part,
-                        parts[part_idx].alter_conversions,
-                        parts[part_idx].part_index_in_query,
-                        MarkRanges{{current_part_range_begin[part_idx], current.range.end}});
+                    const auto & mark = MarkRange{current_part_range_begin[part_idx], current.range.end};
+                    auto it = part_idx_in_layer.emplace(std::make_pair(part_idx, current_layer.size()));
+                    if (it.second)
+                        current_layer.emplace_back(
+                            parts[part_idx].data_part,
+                            parts[part_idx].alter_conversions,
+                            parts[part_idx].part_index_in_query,
+                            MarkRanges{mark});
+                    else
+                        current_layer[it.first->second].ranges.push_back(mark);
 
                     current_part_range_begin.erase(part_idx);
                     current_part_range_end.erase(part_idx);
@@ -170,11 +177,17 @@ std::pair<std::vector<Values>, std::vector<RangesInDataParts>> split(RangesInDat
         }
         for (const auto & [part_idx, last_mark] : current_part_range_end)
         {
-            result_layers.back().emplace_back(
-                parts[part_idx].data_part,
-                parts[part_idx].alter_conversions,
-                parts[part_idx].part_index_in_query,
-                MarkRanges{{current_part_range_begin[part_idx], last_mark + 1}});
+            const auto & mark = MarkRange{current_part_range_begin[part_idx], last_mark + 1};
+            auto it = part_idx_in_layer.emplace(std::make_pair(part_idx, current_layer.size()));
+
+            if (it.second)
+                result_layers.back().emplace_back(
+                    parts[part_idx].data_part,
+                    parts[part_idx].alter_conversions,
+                    parts[part_idx].part_index_in_query,
+                    MarkRanges{mark});
+            else
+                current_layer[it.first->second].ranges.push_back(mark);
 
             current_part_range_begin[part_idx] = current_part_range_end[part_idx];
         }
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
index cfc3ff58f81..30776a8bc50 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@@ -202,6 +202,13 @@ bool DataPartStorageOnDiskBase::isStoredOnRemoteDisk() const
     return volume->getDisk()->isRemote();
 }
 
+std::optional<String> DataPartStorageOnDiskBase::getCacheName() const
+{
+    if (volume->getDisk()->supportsCache())
+        return volume->getDisk()->getCacheName();
+    return std::nullopt;
+}
+
 bool DataPartStorageOnDiskBase::supportZeroCopyReplication() const
 {
     return volume->getDisk()->supportZeroCopyReplication();
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
index 6b27b7296fc..043953eb20c 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
@@ -36,6 +36,7 @@ public:
     std::string getDiskName() const override;
     std::string getDiskType() const override;
     bool isStoredOnRemoteDisk() const override;
+    std::optional<String> getCacheName() const override;
     bool supportZeroCopyReplication() const override;
     bool supportParallelWrite() const override;
     bool isBroken() const override;
diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h
index f160254350d..933c9bd9958 100644
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@@ -149,6 +149,7 @@ public:
     virtual std::string getDiskName() const = 0;
     virtual std::string getDiskType() const = 0;
     virtual bool isStoredOnRemoteDisk() const { return false; }
+    virtual std::optional<String> getCacheName() const { return std::nullopt; }
     virtual bool supportZeroCopyReplication() const { return false; }
     virtual bool supportParallelWrite() const = 0;
     virtual bool isBroken() const = 0;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 32665429051..e806e1bb93f 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -130,10 +130,6 @@ namespace ProfileEvents
 namespace CurrentMetrics
 {
     extern const Metric DelayedInserts;
-    extern const Metric MergeTreePartsLoaderThreads;
-    extern const Metric MergeTreePartsLoaderThreadsActive;
-    extern const Metric MergeTreePartsCleanerThreads;
-    extern const Metric MergeTreePartsCleanerThreadsActive;
 }
 
 
@@ -1425,71 +1421,17 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries(
     UNREACHABLE();
 }
 
-std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
-    ThreadPool & pool,
-    size_t num_parts,
-    std::queue<PartLoadingTreeNodes> & parts_queue,
-    const MergeTreeSettingsPtr & settings)
+std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(PartLoadingTreeNodes & parts_to_load)
 {
-    /// Parallel loading of data parts.
-    pool.setMaxThreads(std::min(static_cast<size_t>(settings->max_part_loading_threads), num_parts));
-    size_t num_threads = pool.getMaxThreads();
-    LOG_DEBUG(log, "Going to use {} threads to load parts", num_threads);
+    const size_t num_parts = parts_to_load.size();
 
-    std::vector<size_t> parts_per_thread(num_threads, num_parts / num_threads);
-    for (size_t i = 0ul; i < num_parts % num_threads; ++i)
-        ++parts_per_thread[i];
+    LOG_DEBUG(log, "Will load {} number of parts using {} threads", num_parts, getActivePartsLoadingThreadPool().get().getMaxThreads());
 
-    /// Prepare data parts for parallel loading. Threads will focus on given disk first, then steal
-    /// others' tasks when finish current disk part loading process.
-    std::vector<PartLoadingTreeNodes> threads_parts(num_threads);
-    std::set<size_t> remaining_thread_parts;
-    std::queue<size_t> threads_queue;
+    /// Shuffle all the parts randomly to possible speed up loading them from JBOD.
+    std::shuffle(parts_to_load.begin(), parts_to_load.end(), thread_local_rng);
 
-    for (size_t i = 0; i < num_threads; ++i)
-    {
-        remaining_thread_parts.insert(i);
-        threads_queue.push(i);
-    }
-
-    while (!parts_queue.empty())
-    {
-        assert(!threads_queue.empty());
-        size_t i = threads_queue.front();
-        auto & need_parts = parts_per_thread[i];
-        assert(need_parts > 0);
-
-        auto & thread_parts = threads_parts[i];
-        auto & current_parts = parts_queue.front();
-        assert(!current_parts.empty());
-
-        auto parts_to_grab = std::min(need_parts, current_parts.size());
-        thread_parts.insert(thread_parts.end(), current_parts.end() - parts_to_grab, current_parts.end());
-        current_parts.resize(current_parts.size() - parts_to_grab);
-        need_parts -= parts_to_grab;
-
-        /// Before processing next thread, change disk if possible.
-        /// Different threads will likely start loading parts from different disk,
-        /// which may improve read parallelism for JBOD.
-
-        /// If current disk still has some parts, push it to the tail.
-        if (!current_parts.empty())
-            parts_queue.push(std::move(current_parts));
-
-        parts_queue.pop();
-
-        /// If current thread still want some parts, push it to the tail.
-        if (need_parts > 0)
-            threads_queue.push(i);
-
-        threads_queue.pop();
-    }
-
-    assert(threads_queue.empty());
-    assert(std::all_of(threads_parts.begin(), threads_parts.end(), [](const auto & parts)
-    {
-        return !parts.empty();
-    }));
+    auto runner = threadPoolCallbackRunner<void>(getActivePartsLoadingThreadPool().get(), "ActiveParts");
+    std::vector<std::future<void>> parts_futures;
 
     std::mutex part_select_mutex;
     std::mutex part_loading_mutex;
@@ -1498,81 +1440,77 @@ std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
 
     try
     {
-        for (size_t thread = 0; thread < num_threads; ++thread)
+        while (true)
         {
-            pool.scheduleOrThrowOnError([&, thread, thread_group = CurrentThread::getGroup()]
+            bool are_parts_to_load_empty = false;
             {
-                SCOPE_EXIT_SAFE(
-                    if (thread_group)
-                        CurrentThread::detachFromGroupIfNotDetached();
-                );
-                if (thread_group)
-                    CurrentThread::attachToGroupIfDetached(thread_group);
+                std::lock_guard lock(part_select_mutex);
+                are_parts_to_load_empty = parts_to_load.empty();
+            }
 
-                while (true)
+            if (are_parts_to_load_empty)
+            {
+                /// Wait for all scheduled tasks.
+                /// We have to use .get() method to rethrow any exception that could occur.
+                for (auto & future: parts_futures)
+                    future.get();
+                parts_futures.clear();
+                /// At this point it is possible, that some other parts appeared in the queue for processing (parts_to_load),
+                /// because we added them from inside the pool.
+                /// So we need to recheck it.
+            }
+
+            PartLoadingTree::NodePtr current_part;
+            {
+                std::lock_guard lock(part_select_mutex);
+                if (parts_to_load.empty())
+                    break;
+
+                current_part = parts_to_load.back();
+                parts_to_load.pop_back();
+            }
+
+            parts_futures.push_back(runner(
+                [&, part = std::move(current_part)]()
                 {
-                    PartLoadingTree::NodePtr thread_part;
-                    size_t thread_idx = thread;
-
-                    {
-                        std::lock_guard lock{part_select_mutex};
-
-                        if (remaining_thread_parts.empty())
-                            return;
-
-                        /// Steal task if nothing to do
-                        if (threads_parts[thread].empty())
-                        {
-                            // Try random steal tasks from the next thread
-                            std::uniform_int_distribution<size_t> distribution(0, remaining_thread_parts.size() - 1);
-                            auto it = remaining_thread_parts.begin();
-                            std::advance(it, distribution(thread_local_rng));
-                            thread_idx = *it;
-                        }
-
-                        auto & thread_parts = threads_parts[thread_idx];
-                        thread_part = thread_parts.back();
-                        thread_parts.pop_back();
-                        if (thread_parts.empty())
-                            remaining_thread_parts.erase(thread_idx);
-                    }
-
                     /// Pass a separate mutex to guard the set of parts, because this lambda
                     /// is called concurrently but with already locked @data_parts_mutex.
                     auto res = loadDataPartWithRetries(
-                        thread_part->info, thread_part->name, thread_part->disk,
+                        part->info, part->name, part->disk,
                         DataPartState::Active, part_loading_mutex, loading_parts_initial_backoff_ms,
                         loading_parts_max_backoff_ms, loading_parts_max_tries);
 
-                    thread_part->is_loaded = true;
+                    part->is_loaded = true;
                     bool is_active_part = res.part->getState() == DataPartState::Active;
 
                     /// If part is broken or duplicate or should be removed according to transaction
                     /// and it has any covered parts then try to load them to replace this part.
-                    if (!is_active_part && !thread_part->children.empty())
+                    if (!is_active_part && !part->children.empty())
                     {
                         std::lock_guard lock{part_select_mutex};
-                        for (const auto & [_, node] : thread_part->children)
-                            threads_parts[thread].push_back(node);
-                        remaining_thread_parts.insert(thread);
+                        for (const auto & [_, node] : part->children)
+                            parts_to_load.push_back(node);
                     }
 
                     {
                         std::lock_guard lock(part_loading_mutex);
                         loaded_parts.push_back(std::move(res));
                     }
-                }
-            });
+                }, Priority{0}));
         }
     }
     catch (...)
     {
-        /// If this is not done, then in case of an exception, tasks will be destroyed before the threads are completed, and it will be bad.
-        pool.wait();
+        /// Wait for all scheduled tasks
+        /// A future becomes invalid after .get() call
+        /// + .wait() method is used not to throw any exception here.
+        for (auto & future: parts_futures)
+            if (future.valid())
+                future.wait();
+
         throw;
     }
 
-    pool.wait();
     return loaded_parts;
 }
 
@@ -1679,9 +1617,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         }
     }
 
-    ThreadPool pool(CurrentMetrics::MergeTreePartsLoaderThreads, CurrentMetrics::MergeTreePartsLoaderThreadsActive, disks.size());
+    auto runner = threadPoolCallbackRunner<void>(getActivePartsLoadingThreadPool().get(), "ActiveParts");
     std::vector<PartLoadingTree::PartLoadingInfos> parts_to_load_by_disk(disks.size());
 
+    std::vector<std::future<void>> disks_futures;
+    disks_futures.reserve(disks.size());
+
     for (size_t i = 0; i < disks.size(); ++i)
     {
         const auto & disk_ptr = disks[i];
@@ -1690,7 +1631,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
         auto & disk_parts = parts_to_load_by_disk[i];
 
-        pool.scheduleOrThrowOnError([&, disk_ptr]()
+        disks_futures.push_back(runner([&, disk_ptr]()
         {
             for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next())
             {
@@ -1703,38 +1644,31 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
                 if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version))
                     disk_parts.emplace_back(*part_info, it->name(), disk_ptr);
             }
-        });
+        }, Priority{0}));
     }
 
-    pool.wait();
+    /// For iteration to be completed
+    /// Any exception will be re-thrown.
+    for (auto & future : disks_futures)
+        future.get();
+    disks_futures.clear();
 
     PartLoadingTree::PartLoadingInfos parts_to_load;
     for (auto & disk_parts : parts_to_load_by_disk)
         std::move(disk_parts.begin(), disk_parts.end(), std::back_inserter(parts_to_load));
 
     auto loading_tree = PartLoadingTree::build(std::move(parts_to_load));
-    /// Collect parts by disks' names.
-    std::map<String, PartLoadingTreeNodes> disk_part_map;
+
+    size_t num_parts = 0;
+    PartLoadingTreeNodes active_parts;
 
     /// Collect only "the most covering" parts from the top level of the tree.
     loading_tree.traverse(/*recursive=*/ false, [&](const auto & node)
     {
-        disk_part_map[node->disk->getName()].emplace_back(node);
+        active_parts.emplace_back(node);
     });
 
-    size_t num_parts = 0;
-    std::queue<PartLoadingTreeNodes> parts_queue;
-
-    for (auto & [disk_name, disk_parts] : disk_part_map)
-    {
-        LOG_INFO(log, "Found {} parts for disk '{}' to load", disk_parts.size(), disk_name);
-
-        if (disk_parts.empty())
-            continue;
-
-        num_parts += disk_parts.size();
-        parts_queue.push(std::move(disk_parts));
-    }
+    num_parts += active_parts.size();
 
     auto part_lock = lockParts();
     LOG_TEST(log, "loadDataParts: clearing data_parts_indexes (had {} parts)", data_parts_indexes.size());
@@ -1754,7 +1688,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
     if (num_parts > 0)
     {
-        auto loaded_parts = loadDataPartsFromDisk(pool, num_parts, parts_queue, settings);
+        auto loaded_parts = loadDataPartsFromDisk(active_parts);
 
         for (const auto & res : loaded_parts)
         {
@@ -1783,10 +1717,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
     if (settings->in_memory_parts_enable_wal)
     {
-        pool.setMaxThreads(disks.size());
         std::vector<MutableDataPartsVector> disks_wal_parts(disks.size());
         std::mutex wal_init_lock;
 
+        std::vector<std::future<void>> wal_disks_futures;
+        wal_disks_futures.reserve(disks.size());
+
         for (size_t i = 0; i < disks.size(); ++i)
         {
             const auto & disk_ptr = disks[i];
@@ -1795,7 +1731,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
             auto & disk_wal_parts = disks_wal_parts[i];
 
-            pool.scheduleOrThrowOnError([&, disk_ptr]()
+            wal_disks_futures.push_back(runner([&, disk_ptr]()
             {
                 for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next())
                 {
@@ -1821,10 +1757,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
                             disk_wal_parts.push_back(std::move(part));
                     }
                 }
-            });
+            }, Priority{0}));
         }
 
-        pool.wait();
+        /// For for iteration to be completed
+        /// Any exception will be re-thrown.
+        for (auto & future : wal_disks_futures)
+            future.get();
+        wal_disks_futures.clear();
 
         MutableDataPartsVector parts_from_wal;
         for (auto & disk_wal_parts : disks_wal_parts)
@@ -1925,7 +1865,7 @@ try
 
     std::atomic_size_t num_loaded_parts = 0;
 
-    auto runner = threadPoolCallbackRunner<void>(OutdatedPartsLoadingThreadPool::get(), "OutdatedParts");
+    auto runner = threadPoolCallbackRunner<void>(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts");
     std::vector<std::future<void>> parts_futures;
 
     while (true)
@@ -1938,8 +1878,10 @@ try
             if (is_async && outdated_data_parts_loading_canceled)
             {
                 /// Wait for every scheduled task
+                /// In case of any exception it will be re-thrown and server will be terminated.
                 for (auto & future : parts_futures)
-                    future.wait();
+                    future.get();
+                parts_futures.clear();
 
                 LOG_DEBUG(log,
                     "Stopped loading outdated data parts because task was canceled. "
@@ -1973,7 +1915,7 @@ try
 
     /// Wait for every scheduled task
     for (auto & future : parts_futures)
-        future.wait();
+        future.get();
 
     LOG_DEBUG(log, "Loaded {} outdated data parts {}",
         num_loaded_parts, is_async ? "asynchronously" : "synchronously");
@@ -1999,6 +1941,13 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_
     if (isStaticStorage())
         return;
 
+    /// We need to load parts as fast as possible
+    getOutdatedPartsLoadingThreadPool().enableTurboMode();
+    SCOPE_EXIT({
+        /// Let's lower the number of threads e.g. for later ATTACH queries to behave as usual
+        getOutdatedPartsLoadingThreadPool().disableTurboMode();
+    });
+
     LOG_TRACE(log, "Will wait for outdated data parts to be loaded");
 
     std::unique_lock lock(outdated_data_parts_mutex);
@@ -2420,20 +2369,15 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         }
     };
 
-    if (settings->max_part_removal_threads <= 1 || parts_to_remove.size() <= settings->concurrent_part_removal_threshold)
+    if (parts_to_remove.size() <= settings->concurrent_part_removal_threshold)
     {
         remove_single_thread();
         return;
     }
 
     /// Parallel parts removal.
-    size_t num_threads = settings->max_part_removal_threads;
-    if (!num_threads)
-        num_threads = getNumberOfPhysicalCPUCores() * 2;
-    num_threads = std::min<size_t>(num_threads, parts_to_remove.size());
     std::mutex part_names_mutex;
-    ThreadPool pool(CurrentMetrics::MergeTreePartsCleanerThreads, CurrentMetrics::MergeTreePartsCleanerThreadsActive,
-                    num_threads, num_threads, /* unlimited queue size */ 0);
+    auto runner = threadPoolCallbackRunner<void>(getPartsCleaningThreadPool().get(), "PartsCleaning");
 
     /// This flag disallow straightforward concurrent parts removal. It's required only in case
     /// when we have parts on zero-copy disk + at least some of them were mutated.
@@ -2453,27 +2397,27 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         LOG_DEBUG(
             log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", "));
 
+        std::vector<std::future<void>> parts_to_remove_futures;
+        parts_to_remove_futures.reserve(parts_to_remove.size());
+
         for (const DataPartPtr & part : parts_to_remove)
         {
-            pool.scheduleOrThrowOnError([&part, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup()]
+            parts_to_remove_futures.push_back(runner([&part, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup()]
             {
-                SCOPE_EXIT_SAFE(
-                    if (thread_group)
-                        CurrentThread::detachFromGroupIfNotDetached();
-                );
-                if (thread_group)
-                    CurrentThread::attachToGroupIfDetached(thread_group);
-
                 asMutableDeletingPart(part)->remove();
                 if (part_names_succeed)
                 {
                     std::lock_guard lock(part_names_mutex);
                     part_names_succeed->insert(part->name);
                 }
-            });
+            }, Priority{0}));
         }
 
-        pool.wait();
+        /// Any exception will be re-thrown.
+        for (auto & future : parts_to_remove_futures)
+            future.get();
+        parts_to_remove_futures.clear();
+
         return;
     }
 
@@ -2544,20 +2488,15 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         return independent_ranges;
     };
 
-    auto schedule_parts_removal = [this, &pool, &part_names_mutex, part_names_succeed](
+    std::vector<std::future<void>> part_removal_futures;
+
+    auto schedule_parts_removal = [this, &runner, &part_names_mutex, part_names_succeed, &part_removal_futures](
         const MergeTreePartInfo & range, DataPartsVector && parts_in_range)
     {
         /// Below, range should be captured by copy to avoid use-after-scope on exception from pool
-        pool.scheduleOrThrowOnError(
-            [this, range, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup(), batch = std::move(parts_in_range)]
+        part_removal_futures.push_back(runner(
+            [this, range, &part_names_mutex, part_names_succeed, batch = std::move(parts_in_range)]
         {
-            SCOPE_EXIT_SAFE(
-                if (thread_group)
-                    CurrentThread::detachFromGroupIfNotDetached();
-            );
-            if (thread_group)
-                CurrentThread::attachToGroupIfDetached(thread_group);
-
             LOG_TRACE(log, "Removing {} parts in blocks range {}", batch.size(), range.getPartNameForLogs());
 
             for (const auto & part : batch)
@@ -2569,7 +2508,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
                     part_names_succeed->insert(part->name);
                 }
             }
-        });
+        }, Priority{0}));
     };
 
     RemovalRanges independent_ranges = split_into_independent_ranges(parts_to_remove, /* split_times */ 0);
@@ -2632,7 +2571,11 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
     LOG_TRACE(log, "Will remove {} big parts separately: {}", excluded_parts.size(), fmt::join(excluded_parts, ", "));
 
     independent_ranges = split_into_independent_ranges(excluded_parts, /* split_times */ 0);
-    pool.wait();
+
+    /// Any exception will be re-thrown.
+    for (auto & future : part_removal_futures)
+        future.get();
+    part_removal_futures.clear();
 
     for (size_t i = 0; i < independent_ranges.infos.size(); ++i)
     {
@@ -2641,7 +2584,10 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         schedule_parts_removal(range, std::move(parts_in_range));
     }
 
-    pool.wait();
+    /// Any exception will be re-thrown.
+    for (auto & future : part_removal_futures)
+        future.get();
+    part_removal_futures.clear();
 
     if (parts_to_remove.size() != sum_of_ranges + excluded_parts.size())
         throw Exception(ErrorCodes::LOGICAL_ERROR,
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 1c41de6fa19..2f254f9a787 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1519,11 +1519,7 @@ private:
         size_t max_backoff_ms,
         size_t max_tries);
 
-    std::vector<LoadPartResult> loadDataPartsFromDisk(
-        ThreadPool & pool,
-        size_t num_parts,
-        std::queue<PartLoadingTreeNodes> & parts_queue,
-        const MergeTreeSettingsPtr & settings);
+    std::vector<LoadPartResult> loadDataPartsFromDisk(PartLoadingTreeNodes & parts_to_load);
 
     void loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal);
 
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 16b27c2c820..4967de8424b 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -16,6 +16,7 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSampleRatio.h>
+#include <Parsers/ExpressionListParsers.h>
 #include <Parsers/parseIdentifierOrStringLiteral.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/InterpreterSelectQuery.h>
@@ -948,25 +949,52 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
 
     std::list<DataSkippingIndexAndCondition> useful_indices;
     std::map<std::pair<String, size_t>, MergedDataSkippingIndexAndCondition> merged_indices;
+    std::unordered_set<std::string> ignored_index_names;
+
+    if (use_skip_indexes && settings.ignore_data_skipping_indices.changed)
+    {
+        const auto & indices = settings.ignore_data_skipping_indices.toString();
+        Tokens tokens(indices.data(), indices.data() + indices.size(), settings.max_query_size);
+        IParser::Pos pos(tokens, static_cast<unsigned>(settings.max_parser_depth));
+        Expected expected;
+
+        /// Use an unordered list rather than string vector
+        auto parse_single_id_or_literal = [&]
+        {
+            String str;
+            if (!parseIdentifierOrStringLiteral(pos, expected, str))
+                return false;
+
+            ignored_index_names.insert(std::move(str));
+            return true;
+        };
+
+        if (!ParserList::parseUtil(pos, expected, parse_single_id_or_literal, false))
+            throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse ignore_data_skipping_indices ('{}')", indices);
+    }
 
     if (use_skip_indexes)
     {
         for (const auto & index : metadata_snapshot->getSecondaryIndices())
         {
-            auto index_helper = MergeTreeIndexFactory::instance().get(index);
-            if (index_helper->isMergeable())
-            {
-                auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()});
-                if (inserted)
-                    it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot);
 
-                it->second.addIndex(index_helper);
-            }
-            else
+            auto index_helper = MergeTreeIndexFactory::instance().get(index);
+            if (!ignored_index_names.contains(index.name))
             {
-                auto condition = index_helper->createIndexCondition(query_info, context);
-                if (!condition->alwaysUnknownOrTrue())
-                    useful_indices.emplace_back(index_helper, condition);
+                if (index_helper->isMergeable())
+                {
+                    auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()});
+                    if (inserted)
+                        it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot);
+
+                    it->second.addIndex(index_helper);
+                }
+                else
+                {
+                    auto condition = index_helper->createIndexCondition(query_info, context);
+                    if (!condition->alwaysUnknownOrTrue())
+                        useful_indices.emplace_back(index_helper, condition);
+                }
             }
         }
     }
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 5ea99009756..33aea358078 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -143,8 +143,6 @@ struct Settings;
     M(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \
     M(Bool, materialize_ttl_recalculate_only, false, "Only recalculate ttl info when MATERIALIZE TTL", 0) \
     M(Bool, enable_mixed_granularity_parts, true, "Enable parts with adaptive and non adaptive granularity", 0) \
-    M(MaxThreads, max_part_loading_threads, 0, "The number of threads to load data parts at startup.", 0) \
-    M(MaxThreads, max_part_removal_threads, 0, "The number of threads for concurrent removal of inactive data parts. One is usually enough, but in 'Google Compute Environment SSD Persistent Disks' file removal (unlink) operation is extraordinarily slow and you probably have to increase this number (recommended is up to 16).", 0) \
     M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \
     M(UInt64, zero_copy_concurrent_part_removal_max_split_times, 5, "Max recursion depth for splitting independent Outdated parts ranges into smaller subranges (highly not recommended to change)", 0) \
     M(Float, zero_copy_concurrent_part_removal_max_postpone_ratio, static_cast<Float32>(0.05), "Max percentage of top level parts to postpone removal in order to get smaller independent ranges (highly not recommended to change)", 0) \
@@ -192,6 +190,9 @@ struct Settings;
     M(UInt64, write_ahead_log_bytes_to_fsync, 100ULL * 1024 * 1024, "Obsolete setting, does nothing.", 0) \
     M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Obsolete setting, does nothing.", 0) \
     M(Bool, in_memory_parts_insert_sync, false, "Obsolete setting, does nothing.", 0) \
+    M(MaxThreads, max_part_loading_threads, 0, "Obsolete setting, does nothing.", 0) \
+    M(MaxThreads, max_part_removal_threads, 0, "Obsolete setting, does nothing.", 0) \
+
     /// Settings that should not change after the creation of a table.
     /// NOLINTNEXTLINE
 #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \
diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp
index b65f044a13b..69fbdd5a64d 100644
--- a/src/Storages/MergeTree/MergeTreeSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSource.cpp
@@ -105,7 +105,7 @@ struct MergeTreeSource::AsyncReadingState
     AsyncReadingState()
     {
         control = std::make_shared<Control>();
-        callback_runner = threadPoolCallbackRunner<void>(IOThreadPool::get(), "MergeTreeRead");
+        callback_runner = threadPoolCallbackRunner<void>(getIOThreadPool().get(), "MergeTreeRead");
     }
 
     ~AsyncReadingState()
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 2d8aaec0f07..f1a7bcb71a2 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -766,7 +766,7 @@ public:
                 DBMS_DEFAULT_BUFFER_SIZE,
                 configuration_.request_settings,
                 std::nullopt,
-                threadPoolCallbackRunner<void>(IOThreadPool::get(), "S3ParallelWrite"),
+                threadPoolCallbackRunner<void>(getIOThreadPool().get(), "S3ParallelWrite"),
                 context->getWriteSettings()),
             compression_method,
             3);
diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp
index 9f80b994051..97af4094e42 100644
--- a/src/Storages/System/StorageSystemDetachedParts.cpp
+++ b/src/Storages/System/StorageSystemDetachedParts.cpp
@@ -194,7 +194,7 @@ private:
             futures.push_back(
                         scheduleFromThreadPool<void>(
                             std::move(worker),
-                            IOThreadPool::get(),
+                            getIOThreadPool().get(),
                             "DP_BytesOnDisk"));
         }
 
diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index cbc9f4f8901..bebfb594b59 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -2,30 +2,27 @@
 
 """The lambda to decrease/increase ASG desired capacity based on current queue"""
 
-import json
 import logging
-import time
 from dataclasses import dataclass
 from pprint import pformat
 from typing import Any, List, Literal, Optional, Tuple
 
 import boto3  # type: ignore
-import requests  # type: ignore
 
-RUNNER_TYPE_LABELS = [
-    "builder",
-    "func-tester",
-    "func-tester-aarch64",
-    "fuzzer-unit-tester",
-    "stress-tester",
-    "style-checker",
-    "style-checker-aarch64",
-]
+from lambda_shared import (
+    CHException,
+    ClickHouseHelper,
+    RUNNER_TYPE_LABELS,
+    get_parameter_from_ssm,
+)
 
+### Update comment on the change ###
 # 4 HOUR - is a balance to get the most precise values
 #   - Our longest possible running check is around 5h on the worst scenario
 #   - The long queue won't be wiped out and replaced, so the measurmenet is fine
 #   - If the data is spoiled by something, we are from the bills perspective
+# Changed it to 3 HOUR: in average we have 1h tasks, but p90 is around 2h.
+# With 4h we have too much wasted computing time in case of issues with DB
 QUEUE_QUERY = f"""SELECT
     last_status AS status,
     toUInt32(count()) AS length,
@@ -40,7 +37,7 @@ FROM
     FROM default.workflow_jobs
     WHERE has(labels, 'self-hosted')
         AND hasAny({RUNNER_TYPE_LABELS}, labels)
-        AND started_at > now() - INTERVAL 4 HOUR
+        AND started_at > now() - INTERVAL 3 HOUR
     GROUP BY ALL
     HAVING last_status IN ('in_progress', 'queued')
 )
@@ -68,64 +65,14 @@ def get_scales(runner_type: str) -> Tuple[int, int]:
         # 10. I am trying 7 now.
         # UPDATE THE COMMENT ON CHANGES
         scale_up = 7
+    elif runner_type == "limited-tester":
+        # The limited runners should inflate and deflate faster
+        scale_down = 1
+        scale_up = 2
     return scale_down, scale_up
 
 
-### VENDORING
-def get_parameter_from_ssm(name, decrypt=True, client=None):
-    if not client:
-        client = boto3.client("ssm", region_name="us-east-1")
-    return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
-
-
-class CHException(Exception):
-    pass
-
-
-class ClickHouseHelper:
-    def __init__(
-        self,
-        url: Optional[str] = None,
-        user: Optional[str] = None,
-        password: Optional[str] = None,
-    ):
-        self.url = url
-        self.auth = {}
-        if user:
-            self.auth["X-ClickHouse-User"] = user
-        if password:
-            self.auth["X-ClickHouse-Key"] = password
-
-    def _select_and_get_json_each_row(self, db, query):
-        params = {
-            "database": db,
-            "query": query,
-            "default_format": "JSONEachRow",
-        }
-        for i in range(5):
-            response = None
-            try:
-                response = requests.get(self.url, params=params, headers=self.auth)
-                response.raise_for_status()
-                return response.text
-            except Exception as ex:
-                logging.warning("Cannot fetch data with exception %s", str(ex))
-                if response:
-                    logging.warning("Reponse text %s", response.text)
-                time.sleep(0.1 * i)
-
-        raise CHException("Cannot fetch data from clickhouse")
-
-    def select_json_each_row(self, db, query):
-        text = self._select_and_get_json_each_row(db, query)
-        result = []
-        for line in text.split("\n"):
-            if line:
-                result.append(json.loads(line))
-        return result
-
-
-CH_CLIENT = ClickHouseHelper(get_parameter_from_ssm("clickhouse-test-stat-url"), "play")
+CH_CLIENT = None  # type: Optional[ClickHouseHelper]
 
 
 def set_capacity(
@@ -170,7 +117,17 @@ def set_capacity(
         # Finally, should the capacity be even changed
         stop = stop or asg["DesiredCapacity"] == desired_capacity
         if stop:
+            logging.info(
+                "Do not increase ASG %s capacity, current capacity=%s, "
+                "maximum capacity=%s, running jobs=%s, queue size=%s",
+                asg["AutoScalingGroupName"],
+                desired_capacity,
+                asg["MaxSize"],
+                running,
+                queued,
+            )
             return
+
         logging.info(
             "The ASG %s capacity will be increased to %s, current capacity=%s, "
             "maximum capacity=%s, running jobs=%s, queue size=%s",
@@ -195,6 +152,15 @@ def set_capacity(
     desired_capacity = min(desired_capacity, asg["MaxSize"])
     stop = stop or asg["DesiredCapacity"] == desired_capacity
     if stop:
+        logging.info(
+            "Do not decrease ASG %s capacity, current capacity=%s, "
+            "minimum capacity=%s, running jobs=%s, queue size=%s",
+            asg["AutoScalingGroupName"],
+            desired_capacity,
+            asg["MinSize"],
+            running,
+            queued,
+        )
         return
 
     logging.info(
@@ -219,6 +185,9 @@ def main(dry_run: bool = True) -> None:
     asg_client = boto3.client("autoscaling")
     try:
         global CH_CLIENT
+        CH_CLIENT = CH_CLIENT or ClickHouseHelper(
+            get_parameter_from_ssm("clickhouse-test-stat-url"), "play"
+        )
         queues = CH_CLIENT.select_json_each_row("default", QUEUE_QUERY)
     except CHException as ex:
         logging.exception(
diff --git a/tests/ci/autoscale_runners_lambda_test.py b/tests/ci/autoscale_runners_lambda/autoscale_runners_lambda_test.py
similarity index 98%
rename from tests/ci/autoscale_runners_lambda_test.py
rename to tests/ci/autoscale_runners_lambda/autoscale_runners_lambda_test.py
index 8e3828f51c0..6772e33374c 100644
--- a/tests/ci/autoscale_runners_lambda_test.py
+++ b/tests/ci/autoscale_runners_lambda/autoscale_runners_lambda_test.py
@@ -4,7 +4,7 @@ import unittest
 from dataclasses import dataclass
 from typing import Any, List
 
-from autoscale_runners_lambda.app import set_capacity, Queue
+from app import set_capacity, Queue
 
 
 @dataclass
diff --git a/tests/ci/autoscale_runners_lambda/lambda_shared b/tests/ci/autoscale_runners_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/autoscale_runners_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/autoscale_runners_lambda/requirements.txt b/tests/ci/autoscale_runners_lambda/requirements.txt
index 3bcbe2dfd07..098e04a9798 100644
--- a/tests/ci/autoscale_runners_lambda/requirements.txt
+++ b/tests/ci/autoscale_runners_lambda/requirements.txt
@@ -1 +1 @@
-requests<2.30
+../lambda_shared_package
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
index 54c87fbcfa5..250655ddeb2 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@@ -9,9 +9,10 @@ import json
 import re
 import time
 
-import jwt
 import requests  # type: ignore
-import boto3  # type: ignore
+
+from lambda_shared.pr import CATEGORY_TO_LABEL, check_pr_description
+from lambda_shared.token import get_cached_access_token
 
 
 NEED_RERUN_ON_EDITED = {
@@ -27,123 +28,6 @@ MAX_RETRY = 5
 
 DEBUG_INFO = {}  # type: Dict[str, Any]
 
-# Descriptions are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
-# updated accordingly
-# The following lists are append only, try to avoid editing them
-# They still could be cleaned out after the decent time though.
-LABELS = {
-    "pr-backward-incompatible": ["Backward Incompatible Change"],
-    "pr-bugfix": [
-        "Bug Fix",
-        "Bug Fix (user-visible misbehavior in an official stable release)",
-        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
-        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
-    ],
-    "pr-build": [
-        "Build/Testing/Packaging Improvement",
-        "Build Improvement",
-        "Build/Testing Improvement",
-        "Build",
-        "Packaging Improvement",
-    ],
-    "pr-documentation": [
-        "Documentation (changelog entry is not required)",
-        "Documentation",
-    ],
-    "pr-feature": ["New Feature"],
-    "pr-improvement": ["Improvement"],
-    "pr-not-for-changelog": [
-        "Not for changelog (changelog entry is not required)",
-        "Not for changelog",
-    ],
-    "pr-performance": ["Performance Improvement"],
-}
-
-CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories}
-
-
-def check_pr_description(pr_body: str) -> Tuple[str, str]:
-    """The function checks the body to being properly formatted according to
-    .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
-    then there is an error."""
-    lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
-    lines = [re.sub(r"\s+", " ", line) for line in lines]
-
-    # Check if body contains "Reverts ClickHouse/ClickHouse#36337"
-    if [
-        True
-        for line in lines
-        if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line)
-    ]:
-        return "", LABELS["pr-not-for-changelog"][0]
-
-    category = ""
-    entry = ""
-    description_error = ""
-
-    i = 0
-    while i < len(lines):
-        if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
-            i += 1
-            if i >= len(lines):
-                break
-            # Can have one empty line between header and the category
-            # itself. Filter it out.
-            if not lines[i]:
-                i += 1
-                if i >= len(lines):
-                    break
-            category = re.sub(r"^[-*\s]*", "", lines[i])
-            i += 1
-
-            # Should not have more than one category. Require empty line
-            # after the first found category.
-            if i >= len(lines):
-                break
-            if lines[i]:
-                second_category = re.sub(r"^[-*\s]*", "", lines[i])
-                description_error = (
-                    "More than one changelog category specified: "
-                    f"'{category}', '{second_category}'"
-                )
-                return description_error, category
-
-        elif re.match(
-            r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
-        ):
-            i += 1
-            # Can have one empty line between header and the entry itself.
-            # Filter it out.
-            if i < len(lines) and not lines[i]:
-                i += 1
-            # All following lines until empty one are the changelog entry.
-            entry_lines = []
-            while i < len(lines) and lines[i]:
-                entry_lines.append(lines[i])
-                i += 1
-            entry = " ".join(entry_lines)
-            # Don't accept changelog entries like '...'.
-            entry = re.sub(r"[#>*_.\- ]", "", entry)
-            # Don't accept changelog entries like 'Close #12345'.
-            entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
-        else:
-            i += 1
-
-    if not category:
-        description_error = "Changelog category is empty"
-    # Filter out the PR categories that are not for changelog.
-    elif re.match(
-        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
-        category,
-    ):
-        pass  # to not check the rest of the conditions
-    elif category not in CATEGORY_TO_LABEL:
-        description_error, category = f"Category '{category}' is not valid", ""
-    elif not entry:
-        description_error = f"Changelog entry required for category '{category}'"
-
-    return description_error, category
-
 
 class Worker(Thread):
     def __init__(
@@ -166,58 +50,6 @@ class Worker(Thread):
         self.queue.task_done()
 
 
-def get_installation_id(jwt_token):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-    return installation_id
-
-
-def get_access_token(jwt_token, installation_id):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]
-
-
-def get_key_and_app_from_aws():
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
-
-def get_token_from_aws():
-    private_key, app_id = get_key_and_app_from_aws()
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    return get_access_token(encoded_jwt, installation_id)
-
-
 def _exec_get_with_retry(url: str, token: str) -> dict:
     headers = {"Authorization": f"token {token}"}
     for i in range(MAX_RETRY):
@@ -407,7 +239,7 @@ def exec_workflow_url(urls_to_post, token):
 
 
 def main(event):
-    token = get_token_from_aws()
+    token = get_cached_access_token()
     DEBUG_INFO["event"] = event
     if event["isBase64Encoded"]:
         event_data = json.loads(b64decode(event["body"]))
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared b/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
index 98be09ab232..4cb3fba0f7b 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
@@ -1,3 +1 @@
-requests<2.30
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py
index 341e1b674ec..71a644fe072 100644
--- a/tests/ci/ci_runners_metrics_lambda/app.py
+++ b/tests/ci/ci_runners_metrics_lambda/app.py
@@ -8,32 +8,26 @@ Lambda function to:
 
 import argparse
 import sys
-import json
-import time
-from collections import namedtuple
 from datetime import datetime
-from typing import Dict, List, Tuple
+from typing import Dict, List
 
-import jwt
 import requests  # type: ignore
 import boto3  # type: ignore
 from botocore.exceptions import ClientError  # type: ignore
 
-UNIVERSAL_LABEL = "universal"
-RUNNER_TYPE_LABELS = [
-    "builder",
-    "func-tester",
-    "func-tester-aarch64",
-    "fuzzer-unit-tester",
-    "stress-tester",
-    "style-checker",
-    "style-checker-aarch64",
-]
-
-RunnerDescription = namedtuple(
-    "RunnerDescription", ["id", "name", "tags", "offline", "busy"]
+from lambda_shared import (
+    RUNNER_TYPE_LABELS,
+    RunnerDescription,
+    RunnerDescriptions,
+    list_runners,
 )
-RunnerDescriptions = List[RunnerDescription]
+from lambda_shared.token import (
+    get_cached_access_token,
+    get_key_and_app_from_aws,
+    get_access_token_by_key_app,
+)
+
+UNIVERSAL_LABEL = "universal"
 
 
 def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions:
@@ -105,138 +99,53 @@ def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions:
 def get_lost_ec2_instances(runners: RunnerDescriptions) -> List[dict]:
     client = boto3.client("ec2")
     reservations = client.describe_instances(
-        Filters=[{"Name": "tag-key", "Values": ["github:runner-type"]}]
+        Filters=[
+            {"Name": "tag-key", "Values": ["github:runner-type"]},
+            {"Name": "instance-state-name", "Values": ["pending", "running"]},
+        ],
     )["Reservations"]
-    lost_instances = []
-    offline_runners = [
-        runner.name for runner in runners if runner.offline and not runner.busy
+    # flatten the reservation into instances
+    instances = [
+        instance
+        for reservation in reservations
+        for instance in reservation["Instances"]
     ]
-    # Here we refresh the runners to get the most recent state
+    lost_instances = []
+    offline_runner_names = {
+        runner.name for runner in runners if runner.offline and not runner.busy
+    }
+    runner_names = {runner.name for runner in runners}
     now = datetime.now().timestamp()
 
-    for reservation in reservations:
-        for instance in reservation["Instances"]:
-            # Do not consider instances started 20 minutes ago as problematic
-            if now - instance["LaunchTime"].timestamp() < 1200:
-                continue
+    for instance in instances:
+        # Do not consider instances started 20 minutes ago as problematic
+        if now - instance["LaunchTime"].timestamp() < 1200:
+            continue
 
-            runner_type = [
-                tag["Value"]
-                for tag in instance["Tags"]
-                if tag["Key"] == "github:runner-type"
-            ][0]
-            # If there's no necessary labels in runner type it's fine
-            if not (
-                UNIVERSAL_LABEL in runner_type or runner_type in RUNNER_TYPE_LABELS
-            ):
-                continue
+        runner_type = [
+            tag["Value"]
+            for tag in instance["Tags"]
+            if tag["Key"] == "github:runner-type"
+        ][0]
+        # If there's no necessary labels in runner type it's fine
+        if not (UNIVERSAL_LABEL in runner_type or runner_type in RUNNER_TYPE_LABELS):
+            continue
 
-            if instance["InstanceId"] in offline_runners:
-                lost_instances.append(instance)
-                continue
+        if instance["InstanceId"] in offline_runner_names:
+            lost_instances.append(instance)
+            continue
 
-            if instance["State"]["Name"] == "running" and (
-                not [
-                    runner
-                    for runner in runners
-                    if runner.name == instance["InstanceId"]
-                ]
-            ):
-                lost_instances.append(instance)
+        if (
+            instance["State"]["Name"] == "running"
+            and not instance["InstanceId"] in runner_names
+        ):
+            lost_instances.append(instance)
 
     return lost_instances
 
 
-def get_key_and_app_from_aws() -> Tuple[str, int]:
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
-
 def handler(event, context):
-    private_key, app_id = get_key_and_app_from_aws()
-    main(private_key, app_id, True, True)
-
-
-def get_installation_id(jwt_token: str) -> int:
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-            break
-
-    return installation_id  # type: ignore
-
-
-def get_access_token(jwt_token: str, installation_id: int) -> str:
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]  # type: ignore
-
-
-def list_runners(access_token: str) -> RunnerDescriptions:
-    headers = {
-        "Authorization": f"token {access_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    per_page = 100
-    response = requests.get(
-        f"https://api.github.com/orgs/ClickHouse/actions/runners?per_page={per_page}",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    total_runners = data["total_count"]
-    print("Expected total runners", total_runners)
-    runners = data["runners"]
-
-    # round to 0 for 0, 1 for 1..100, but to 2 for 101..200
-    total_pages = (total_runners - 1) // per_page + 1
-
-    print("Total pages", total_pages)
-    for i in range(2, total_pages + 1):
-        response = requests.get(
-            "https://api.github.com/orgs/ClickHouse/actions/runners"
-            f"?page={i}&per_page={per_page}",
-            headers=headers,
-        )
-        response.raise_for_status()
-        data = response.json()
-        runners += data["runners"]
-
-    print("Total runners", len(runners))
-    result = []
-    for runner in runners:
-        tags = [tag["name"] for tag in runner["labels"]]
-        desc = RunnerDescription(
-            id=runner["id"],
-            name=runner["name"],
-            tags=tags,
-            offline=runner["status"] == "offline",
-            busy=runner["busy"],
-        )
-        result.append(desc)
-
-    return result
+    main(get_cached_access_token(), True, True)
 
 
 def group_runners_by_tag(
@@ -265,18 +174,21 @@ def group_runners_by_tag(
 
 
 def push_metrics_to_cloudwatch(
-    listed_runners: RunnerDescriptions, namespace: str
+    listed_runners: RunnerDescriptions, group_name: str
 ) -> None:
     client = boto3.client("cloudwatch")
+    namespace = "RunnersMetrics"
     metrics_data = []
     busy_runners = sum(
         1 for runner in listed_runners if runner.busy and not runner.offline
     )
+    dimensions = [{"Name": "group", "Value": group_name}]
     metrics_data.append(
         {
             "MetricName": "BusyRunners",
             "Value": busy_runners,
             "Unit": "Count",
+            "Dimensions": dimensions,
         }
     )
     total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
@@ -285,6 +197,7 @@ def push_metrics_to_cloudwatch(
             "MetricName": "ActiveRunners",
             "Value": total_active_runners,
             "Unit": "Count",
+            "Dimensions": dimensions,
         }
     )
     total_runners = len(listed_runners)
@@ -293,6 +206,7 @@ def push_metrics_to_cloudwatch(
             "MetricName": "TotalRunners",
             "Value": total_runners,
             "Unit": "Count",
+            "Dimensions": dimensions,
         }
     )
     if total_active_runners == 0:
@@ -305,6 +219,7 @@ def push_metrics_to_cloudwatch(
             "MetricName": "BusyRunnersRatio",
             "Value": busy_ratio,
             "Unit": "Percent",
+            "Dimensions": dimensions,
         }
     )
 
@@ -327,26 +242,16 @@ def delete_runner(access_token: str, runner: RunnerDescription) -> bool:
 
 
 def main(
-    github_secret_key: str,
-    github_app_id: int,
+    access_token: str,
     push_to_cloudwatch: bool,
     delete_offline_runners: bool,
 ) -> None:
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": github_app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    access_token = get_access_token(encoded_jwt, installation_id)
     gh_runners = list_runners(access_token)
     grouped_runners = group_runners_by_tag(gh_runners)
     for group, group_runners in grouped_runners.items():
         if push_to_cloudwatch:
             print(f"Pushing metrics for group '{group}'")
-            push_metrics_to_cloudwatch(group_runners, "RunnersMetrics/" + group)
+            push_metrics_to_cloudwatch(group_runners, group)
         else:
             print(group, f"({len(group_runners)})")
             for runner in group_runners:
@@ -408,4 +313,6 @@ if __name__ == "__main__":
         print("Attempt to get key and id from AWS secret manager")
         private_key, args.app_id = get_key_and_app_from_aws()
 
-    main(private_key, args.app_id, args.push_to_cloudwatch, args.delete_offline)
+    token = get_access_token_by_key_app(private_key, args.app_id)
+
+    main(token, args.push_to_cloudwatch, args.delete_offline)
diff --git a/tests/ci/ci_runners_metrics_lambda/lambda_shared b/tests/ci/ci_runners_metrics_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/ci_runners_metrics_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/ci_runners_metrics_lambda/requirements.txt b/tests/ci/ci_runners_metrics_lambda/requirements.txt
index 98be09ab232..e2b16067a93 100644
--- a/tests/ci/ci_runners_metrics_lambda/requirements.txt
+++ b/tests/ci/ci_runners_metrics_lambda/requirements.txt
@@ -1,3 +1,2 @@
-requests<2.30
-PyJWT
-cryptography<38
+../lambda_shared_package
+../lambda_shared_package[token]
diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py
index 9d35d2d6e35..c21fafa2605 100644
--- a/tests/ci/jepsen_check.py
+++ b/tests/ci/jepsen_check.py
@@ -25,6 +25,7 @@ from stopwatch import Stopwatch
 from tee_popen import TeePopen
 from upload_result_helper import upload_results
 from version_helper import get_version_from_repo
+from build_check import get_release_or_pr
 
 JEPSEN_GROUP_NAME = "jepsen_group"
 
@@ -210,12 +211,7 @@ if __name__ == "__main__":
 
     build_name = get_build_name_for_check(check_name)
 
-    if pr_info.number == 0:
-        version = get_version_from_repo()
-        release_or_pr = f"{version.major}.{version.minor}"
-    else:
-        # PR number for anything else
-        release_or_pr = str(pr_info.number)
+    release_or_pr, _ = get_release_or_pr(pr_info, get_version_from_repo())
 
     # This check run separately from other checks because it requires exclusive
     # run (see .github/workflows/jepsen.yml) So we cannot add explicit
diff --git a/tests/ci/lambda_shared_package/.gitignore b/tests/ci/lambda_shared_package/.gitignore
new file mode 100644
index 00000000000..59d52651e06
--- /dev/null
+++ b/tests/ci/lambda_shared_package/.gitignore
@@ -0,0 +1,2 @@
+build
+*.egg-info
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/__init__.py b/tests/ci/lambda_shared_package/__init__.py
similarity index 100%
rename from tests/ci/cancel_and_rerun_workflow_lambda/__init__.py
rename to tests/ci/lambda_shared_package/__init__.py
diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
new file mode 100644
index 00000000000..c56994cc86a
--- /dev/null
+++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py
@@ -0,0 +1,221 @@
+"""The shared code and types for all our CI lambdas
+It exists as __init__.py and lambda_shared/__init__.py to work both in local and venv"""
+
+import json
+import logging
+import time
+from collections import namedtuple
+from typing import Any, Dict, Iterable, List, Optional
+
+import boto3  # type: ignore
+import requests  # type: ignore
+
+RUNNER_TYPE_LABELS = [
+    "builder",
+    "func-tester",
+    "func-tester-aarch64",
+    "fuzzer-unit-tester",
+    "limited-tester",
+    "stress-tester",
+    "style-checker",
+    "style-checker-aarch64",
+]
+
+
+### VENDORING
+def get_parameter_from_ssm(
+    name: str, decrypt: bool = True, client: Optional[Any] = None
+) -> str:
+    if not client:
+        client = boto3.client("ssm", region_name="us-east-1")
+    return client.get_parameter(Name=name, WithDecryption=decrypt)[  # type: ignore
+        "Parameter"
+    ]["Value"]
+
+
+class CHException(Exception):
+    pass
+
+
+class InsertException(CHException):
+    pass
+
+
+class ClickHouseHelper:
+    def __init__(
+        self,
+        url: str,
+        user: Optional[str] = None,
+        password: Optional[str] = None,
+    ):
+        self.url = url
+        self.auth = {}
+        if user:
+            self.auth["X-ClickHouse-User"] = user
+        if password:
+            self.auth["X-ClickHouse-Key"] = password
+
+    @staticmethod
+    def _insert_json_str_info_impl(
+        url: str, auth: Dict[str, str], db: str, table: str, json_str: str
+    ) -> None:
+        params = {
+            "database": db,
+            "query": f"INSERT INTO {table} FORMAT JSONEachRow",
+            "date_time_input_format": "best_effort",
+            "send_logs_level": "warning",
+        }
+
+        for i in range(5):
+            try:
+                response = requests.post(
+                    url, params=params, data=json_str, headers=auth
+                )
+            except Exception as e:
+                error = f"Received exception while sending data to {url} on {i} attempt: {e}"
+                logging.warning(error)
+                continue
+
+            logging.info("Response content '%s'", response.content)
+
+            if response.ok:
+                break
+
+            error = (
+                "Cannot insert data into clickhouse at try "
+                + str(i)
+                + ": HTTP code "
+                + str(response.status_code)
+                + ": '"
+                + str(response.text)
+                + "'"
+            )
+
+            if response.status_code >= 500:
+                # A retriable error
+                time.sleep(1)
+                continue
+
+            logging.info(
+                "Request headers '%s', body '%s'",
+                response.request.headers,
+                response.request.body,
+            )
+
+            raise InsertException(error)
+        else:
+            raise InsertException(error)
+
+    def _insert_json_str_info(self, db: str, table: str, json_str: str) -> None:
+        self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str)
+
+    def insert_event_into(
+        self, db: str, table: str, event: object, safe: bool = True
+    ) -> None:
+        event_str = json.dumps(event)
+        try:
+            self._insert_json_str_info(db, table, event_str)
+        except InsertException as e:
+            logging.error(
+                "Exception happened during inserting data into clickhouse: %s", e
+            )
+            if not safe:
+                raise
+
+    def insert_events_into(
+        self, db: str, table: str, events: Iterable[object], safe: bool = True
+    ) -> None:
+        jsons = []
+        for event in events:
+            jsons.append(json.dumps(event))
+
+        try:
+            self._insert_json_str_info(db, table, ",".join(jsons))
+        except InsertException as e:
+            logging.error(
+                "Exception happened during inserting data into clickhouse: %s", e
+            )
+            if not safe:
+                raise
+
+    def _select_and_get_json_each_row(self, db: str, query: str) -> str:
+        params = {
+            "database": db,
+            "query": query,
+            "default_format": "JSONEachRow",
+        }
+        for i in range(5):
+            response = None
+            try:
+                response = requests.get(self.url, params=params, headers=self.auth)
+                response.raise_for_status()
+                return response.text  # type: ignore
+            except Exception as ex:
+                logging.warning("Cannot fetch data with exception %s", str(ex))
+                if response:
+                    logging.warning("Reponse text %s", response.text)
+                time.sleep(0.1 * i)
+
+        raise CHException("Cannot fetch data from clickhouse")
+
+    def select_json_each_row(self, db: str, query: str) -> List[dict]:
+        text = self._select_and_get_json_each_row(db, query)
+        result = []
+        for line in text.split("\n"):
+            if line:
+                result.append(json.loads(line))
+        return result
+
+
+### Runners
+
+RunnerDescription = namedtuple(
+    "RunnerDescription", ["id", "name", "tags", "offline", "busy"]
+)
+RunnerDescriptions = List[RunnerDescription]
+
+
+def list_runners(access_token: str) -> RunnerDescriptions:
+    headers = {
+        "Authorization": f"token {access_token}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+    per_page = 100
+    response = requests.get(
+        f"https://api.github.com/orgs/ClickHouse/actions/runners?per_page={per_page}",
+        headers=headers,
+    )
+    response.raise_for_status()
+    data = response.json()
+    total_runners = data["total_count"]
+    print("Expected total runners", total_runners)
+    runners = data["runners"]
+
+    # round to 0 for 0, 1 for 1..100, but to 2 for 101..200
+    total_pages = (total_runners - 1) // per_page + 1
+
+    print("Total pages", total_pages)
+    for i in range(2, total_pages + 1):
+        response = requests.get(
+            "https://api.github.com/orgs/ClickHouse/actions/runners"
+            f"?page={i}&per_page={per_page}",
+            headers=headers,
+        )
+        response.raise_for_status()
+        data = response.json()
+        runners += data["runners"]
+
+    print("Total runners", len(runners))
+    result = []
+    for runner in runners:
+        tags = [tag["name"] for tag in runner["labels"]]
+        desc = RunnerDescription(
+            id=runner["id"],
+            name=runner["name"],
+            tags=tags,
+            offline=runner["status"] == "offline",
+            busy=runner["busy"],
+        )
+        result.append(desc)
+
+    return result
diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py
new file mode 100644
index 00000000000..ef47eacc082
--- /dev/null
+++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python
+
+import re
+from typing import Tuple
+
+# Individual trusted contirbutors who are not in any trusted organization.
+# Can be changed in runtime: we will append users that we learned to be in
+# a trusted org, to save GitHub API calls.
+TRUSTED_CONTRIBUTORS = {
+    e.lower()
+    for e in [
+        "achimbab",
+        "adevyatova ",  # DOCSUP
+        "Algunenano",  # Raúl Marín, Tinybird
+        "amosbird",
+        "AnaUvarova",  # DOCSUP
+        "anauvarova",  # technical writer, Yandex
+        "annvsh",  # technical writer, Yandex
+        "atereh",  # DOCSUP
+        "azat",
+        "bharatnc",  # Newbie, but already with many contributions.
+        "bobrik",  # Seasoned contributor, CloudFlare
+        "BohuTANG",
+        "codyrobert",  # Flickerbox engineer
+        "cwurm",  # Employee
+        "damozhaeva",  # DOCSUP
+        "den-crane",
+        "flickerbox-tom",  # Flickerbox
+        "gyuton",  # DOCSUP
+        "hagen1778",  # Roman Khavronenko, seasoned contributor
+        "hczhcz",
+        "hexiaoting",  # Seasoned contributor
+        "ildus",  # adjust, ex-pgpro
+        "javisantana",  # a Spanish ClickHouse enthusiast, ex-Carto
+        "ka1bi4",  # DOCSUP
+        "kirillikoff",  # DOCSUP
+        "kreuzerkrieg",
+        "lehasm",  # DOCSUP
+        "michon470",  # DOCSUP
+        "nikvas0",
+        "nvartolomei",
+        "olgarev",  # DOCSUP
+        "otrazhenia",  # Yandex docs contractor
+        "pdv-ru",  # DOCSUP
+        "podshumok",  # cmake expert from QRator Labs
+        "s-mx",  # Maxim Sabyanin, former employee, present contributor
+        "sevirov",  # technical writer, Yandex
+        "spongedu",  # Seasoned contributor
+        "taiyang-li",
+        "ucasFL",  # Amos Bird's friend
+        "vdimir",  # Employee
+        "vzakaznikov",
+        "YiuRULE",
+        "zlobober",  # Developer of YT
+        "ilejn",  # Arenadata, responsible for Kerberized Kafka
+        "thomoco",  # ClickHouse
+        "BoloniniD",  # Seasoned contributor, HSE
+        "tonickkozlov",  # Cloudflare
+        "tylerhannan",  # ClickHouse Employee
+        "myrrc",  # Mike Kot, DoubleCloud
+        "thevar1able",  # ClickHouse Employee
+        "aalexfvk",
+        "MikhailBurdukov",
+        "tsolodov",  # ClickHouse Employee
+        "kitaisreal",
+    ]
+}
+
+# Descriptions are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
+# updated accordingly
+# The following lists are append only, try to avoid editing them
+# They still could be cleaned out after the decent time though.
+LABELS = {
+    "pr-backward-incompatible": ["Backward Incompatible Change"],
+    "pr-bugfix": [
+        "Bug Fix",
+        "Bug Fix (user-visible misbehavior in an official stable release)",
+        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
+        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
+    ],
+    "pr-build": [
+        "Build/Testing/Packaging Improvement",
+        "Build Improvement",
+        "Build/Testing Improvement",
+        "Build",
+        "Packaging Improvement",
+    ],
+    "pr-documentation": [
+        "Documentation (changelog entry is not required)",
+        "Documentation",
+    ],
+    "pr-feature": ["New Feature"],
+    "pr-improvement": ["Improvement"],
+    "pr-not-for-changelog": [
+        "Not for changelog (changelog entry is not required)",
+        "Not for changelog",
+    ],
+    "pr-performance": ["Performance Improvement"],
+}
+
+CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories}
+
+
+def check_pr_description(pr_body: str) -> Tuple[str, str]:
+    """The function checks the body to being properly formatted according to
+    .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
+    then there is an error."""
+    lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
+    lines = [re.sub(r"\s+", " ", line) for line in lines]
+
+    # Check if body contains "Reverts ClickHouse/ClickHouse#36337"
+    if [
+        True
+        for line in lines
+        if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line)
+    ]:
+        return "", LABELS["pr-not-for-changelog"][0]
+
+    category = ""
+    entry = ""
+    description_error = ""
+
+    i = 0
+    while i < len(lines):
+        if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
+            i += 1
+            if i >= len(lines):
+                break
+            # Can have one empty line between header and the category
+            # itself. Filter it out.
+            if not lines[i]:
+                i += 1
+                if i >= len(lines):
+                    break
+            category = re.sub(r"^[-*\s]*", "", lines[i])
+            i += 1
+
+            # Should not have more than one category. Require empty line
+            # after the first found category.
+            if i >= len(lines):
+                break
+            if lines[i]:
+                second_category = re.sub(r"^[-*\s]*", "", lines[i])
+                description_error = (
+                    "More than one changelog category specified: "
+                    f"'{category}', '{second_category}'"
+                )
+                return description_error, category
+
+        elif re.match(
+            r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
+        ):
+            i += 1
+            # Can have one empty line between header and the entry itself.
+            # Filter it out.
+            if i < len(lines) and not lines[i]:
+                i += 1
+            # All following lines until empty one are the changelog entry.
+            entry_lines = []
+            while i < len(lines) and lines[i]:
+                entry_lines.append(lines[i])
+                i += 1
+            entry = " ".join(entry_lines)
+            # Don't accept changelog entries like '...'.
+            entry = re.sub(r"[#>*_.\- ]", "", entry)
+            # Don't accept changelog entries like 'Close #12345'.
+            entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
+        else:
+            i += 1
+
+    if not category:
+        description_error = "Changelog category is empty"
+    # Filter out the PR categories that are not for changelog.
+    elif re.match(
+        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
+        category,
+    ):
+        pass  # to not check the rest of the conditions
+    elif category not in CATEGORY_TO_LABEL:
+        description_error, category = f"Category '{category}' is not valid", ""
+    elif not entry:
+        description_error = f"Changelog entry required for category '{category}'"
+
+    return description_error, category
diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py
new file mode 100644
index 00000000000..174ea4625a3
--- /dev/null
+++ b/tests/ci/lambda_shared_package/lambda_shared/token.py
@@ -0,0 +1,90 @@
+"""Module to get the token for GitHub"""
+from dataclasses import dataclass
+import json
+import time
+from typing import Tuple
+
+import boto3  # type: ignore
+import jwt
+import requests  # type: ignore
+
+
+def get_key_and_app_from_aws() -> Tuple[str, int]:
+    secret_name = "clickhouse_github_secret_key"
+    session = boto3.session.Session()
+    client = session.client(
+        service_name="secretsmanager",
+    )
+    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
+    data = json.loads(get_secret_value_response["SecretString"])
+    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
+
+
+def get_installation_id(jwt_token: str) -> int:
+    headers = {
+        "Authorization": f"Bearer {jwt_token}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+    response = requests.get("https://api.github.com/app/installations", headers=headers)
+    response.raise_for_status()
+    data = response.json()
+    for installation in data:
+        if installation["account"]["login"] == "ClickHouse":
+            installation_id = installation["id"]
+
+    return installation_id  # type: ignore
+
+
+def get_access_token_by_jwt(jwt_token: str, installation_id: int) -> str:
+    headers = {
+        "Authorization": f"Bearer {jwt_token}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+    response = requests.post(
+        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
+        headers=headers,
+    )
+    response.raise_for_status()
+    data = response.json()
+    return data["token"]  # type: ignore
+
+
+def get_token_from_aws() -> str:
+    private_key, app_id = get_key_and_app_from_aws()
+    return get_access_token_by_key_app(private_key, app_id)
+
+
+def get_access_token_by_key_app(private_key: str, app_id: int) -> str:
+    payload = {
+        "iat": int(time.time()) - 60,
+        "exp": int(time.time()) + (10 * 60),
+        "iss": app_id,
+    }
+
+    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
+    installation_id = get_installation_id(encoded_jwt)
+    return get_access_token_by_jwt(encoded_jwt, installation_id)
+
+
+@dataclass
+class CachedToken:
+    time: int
+    value: str
+    updating: bool = False
+
+
+_cached_token = CachedToken(0, "")
+
+
+def get_cached_access_token() -> str:
+    if time.time() - 550 < _cached_token.time or _cached_token.updating:
+        return _cached_token.value
+    # Indicate that the value is updating now, so the cached value can be
+    # used. The first setting and close-to-ttl are not counted as update
+    if _cached_token.time != 0 or time.time() - 590 < _cached_token.time:
+        _cached_token.updating = True
+    private_key, app_id = get_key_and_app_from_aws()
+    _cached_token.time = int(time.time())
+    _cached_token.value = get_access_token_by_key_app(private_key, app_id)
+    _cached_token.updating = False
+    return _cached_token.value
diff --git a/tests/ci/lambda_shared_package/pyproject.toml b/tests/ci/lambda_shared_package/pyproject.toml
new file mode 100644
index 00000000000..dff36b89fbb
--- /dev/null
+++ b/tests/ci/lambda_shared_package/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "lambda_shared"
+version = "0.0.1"
+dependencies = [
+  "requests",
+  "urllib3 < 2"
+]
+
+[project.optional-dependencies]
+token = [
+  "PyJWT",
+  "cryptography",
+]
+dev = [
+  "boto3",
+  "lambda_shared[token]",
+]
+
+[tool.distutils.bdist_wheel]
+universal = true
diff --git a/tests/ci/lambda_shared_package/setup.cfg b/tests/ci/lambda_shared_package/setup.cfg
new file mode 100644
index 00000000000..744280ae41b
--- /dev/null
+++ b/tests/ci/lambda_shared_package/setup.cfg
@@ -0,0 +1,8 @@
+### This file exists for clear builds in docker ###
+#   without it the `build` directory wouldn't be  #
+# updated on the fly and will require manual clean #
+[build]
+build_base = /tmp/lambda_shared
+
+[egg_info]
+egg_base = /tmp/
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index bf5704f31bd..41ace95c350 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -219,6 +219,12 @@ if __name__ == "__main__":
     except Exception:
         traceback.print_exc()
 
+    def too_many_slow(msg):
+        match = re.search(r"(|.* )(\d+) slower.*", msg)
+        # This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629
+        threshold = 5
+        return int(match.group(2).strip()) > threshold if match else False
+
     # Try to fetch status from the report.
     status = ""
     message = ""
@@ -236,7 +242,7 @@ if __name__ == "__main__":
 
         # TODO: Remove me, always green mode for the first time, unless errors
         status = "success"
-        if "errors" in message.lower():
+        if "errors" in message.lower() or too_many_slow(message.lower()):
             status = "failure"
         # TODO: Remove until here
     except Exception:
diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index 9849f19a1e4..330a1309016 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -20,9 +20,11 @@ from docs_check import NAME as DOCS_NAME
 from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-
-from cancel_and_rerun_workflow_lambda.app import CATEGORY_TO_LABEL, check_pr_description
-from workflow_approve_rerun_lambda.app import TRUSTED_CONTRIBUTORS
+from lambda_shared_package.lambda_shared.pr import (
+    CATEGORY_TO_LABEL,
+    TRUSTED_CONTRIBUTORS,
+    check_pr_description,
+)
 
 TRUSTED_ORG_IDS = {
     54801242,  # clickhouse
diff --git a/tests/ci/runner_token_rotation_lambda/__init__.py b/tests/ci/runner_token_rotation_lambda/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/ci/runner_token_rotation_lambda/app.py b/tests/ci/runner_token_rotation_lambda/app.py
index 70ee5da01f4..6544eee9581 100644
--- a/tests/ci/runner_token_rotation_lambda/app.py
+++ b/tests/ci/runner_token_rotation_lambda/app.py
@@ -2,40 +2,11 @@
 
 import argparse
 import sys
-import json
-import time
 
 import boto3  # type: ignore
-import jwt
 import requests  # type: ignore
 
-
-def get_installation_id(jwt_token):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-    return installation_id
-
-
-def get_access_token(jwt_token, installation_id):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]
+from lambda_shared.token import get_cached_access_token, get_access_token_by_key_app
 
 
 def get_runner_registration_token(access_token):
@@ -52,32 +23,10 @@ def get_runner_registration_token(access_token):
     return data["token"]
 
 
-def get_key_and_app_from_aws():
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
-
-def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": github_app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    access_token = get_access_token(encoded_jwt, installation_id)
+def main(access_token, push_to_ssm, ssm_parameter_name):
     runner_registration_token = get_runner_registration_token(access_token)
 
     if push_to_ssm:
-        import boto3
-
         print("Trying to put params into ssm manager")
         client = boto3.client("ssm")
         client.put_parameter(
@@ -94,8 +43,7 @@ def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
 
 
 def handler(event, context):
-    private_key, app_id = get_key_and_app_from_aws()
-    main(private_key, app_id, True, "github_runner_registration_token")
+    main(get_cached_access_token(), True, "github_runner_registration_token")
 
 
 if __name__ == "__main__":
@@ -140,4 +88,5 @@ if __name__ == "__main__":
         with open(args.private_key_path, "r") as key_file:
             private_key = key_file.read()
 
-    main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name)
+    token = get_access_token_by_key_app(private_key, args.app_id)
+    main(token, args.push_to_ssm, args.ssm_parameter_name)
diff --git a/tests/ci/runner_token_rotation_lambda/lambda_shared b/tests/ci/runner_token_rotation_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/runner_token_rotation_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/runner_token_rotation_lambda/requirements.txt b/tests/ci/runner_token_rotation_lambda/requirements.txt
index 98be09ab232..4cb3fba0f7b 100644
--- a/tests/ci/runner_token_rotation_lambda/requirements.txt
+++ b/tests/ci/runner_token_rotation_lambda/requirements.txt
@@ -1,3 +1 @@
-requests<2.30
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/team_keys_lambda/__init__.py b/tests/ci/team_keys_lambda/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py
index 870d41c441e..f562fbe101d 100644
--- a/tests/ci/team_keys_lambda/app.py
+++ b/tests/ci/team_keys_lambda/app.py
@@ -81,6 +81,8 @@ def get_cached_members_keys(members: set) -> Keys:
 
 
 def get_token_from_aws() -> str:
+    # We need a separate token, since the clickhouse-ci app does not have
+    # access to the organization members' endpoint
     secret_name = "clickhouse_robot_token"
     session = boto3.session.Session()
     client = session.client(
@@ -130,4 +132,4 @@ if __name__ == "__main__":
     args = parser.parse_args()
     output = main(args.token, args.organization, args.team)
 
-    print(f"# Just shoing off the keys:\n{output}")
+    print(f"# Just showing off the keys:\n{output}")
diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
index 4aee85c588a..02d5638cf18 100644
--- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
+++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh
@@ -3,13 +3,19 @@ set -xeo pipefail
 
 WORKDIR=$(dirname "$0")
 WORKDIR=$(readlink -f "${WORKDIR}")
+DIR_NAME=$(basename "$WORKDIR")
 cd "$WORKDIR"
 
-PY_VERSION=3.10
+# Do not deploy the lambda to AWS
+DRY_RUN=${DRY_RUN:-}
+# Python runtime to install dependencies
+PY_VERSION=${PY_VERSION:-3.10}
 PY_EXEC="python${PY_VERSION}"
-DOCKER_IMAGE="python:${PY_VERSION}-slim"
-LAMBDA_NAME=$(basename "$WORKDIR")
-LAMBDA_NAME=${LAMBDA_NAME//_/-}
+# Image to build the lambda zip package
+DOCKER_IMAGE="public.ecr.aws/lambda/python:${PY_VERSION}"
+# Rename the_lambda_name directory to the-lambda-name lambda in AWS
+LAMBDA_NAME=${DIR_NAME//_/-}
+# The name of directory with lambda code
 PACKAGE=lambda-package
 rm -rf "$PACKAGE" "$PACKAGE".zip
 mkdir "$PACKAGE"
@@ -17,8 +23,9 @@ cp app.py "$PACKAGE"
 if [ -f requirements.txt ]; then
   VENV=lambda-venv
   rm -rf "$VENV" lambda-package.zip
-  docker run --rm --user="${UID}" --volume="${WORKDIR}:/lambda" --workdir="/lambda" "${DOCKER_IMAGE}" \
-    /bin/bash -c "
+  docker run --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash \
+    --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" \
+    -exc "
       '$PY_EXEC' -m venv '$VENV' &&
       source '$VENV/bin/activate' &&
       pip install -r requirements.txt
@@ -28,4 +35,6 @@ if [ -f requirements.txt ]; then
 fi
 ( cd "$PACKAGE" && zip -9 -r ../"$PACKAGE".zip . )
 
-aws lambda update-function-code --function-name "$LAMBDA_NAME" --zip-file fileb://"$PACKAGE".zip
+if [ -z "$DRY_RUN" ]; then
+  aws lambda update-function-code --function-name "$LAMBDA_NAME" --zip-file fileb://"$WORKDIR/$PACKAGE".zip
+fi
diff --git a/tests/ci/team_keys_lambda/lambda_shared b/tests/ci/team_keys_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/team_keys_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/team_keys_lambda/requirements.txt b/tests/ci/team_keys_lambda/requirements.txt
index 3bcbe2dfd07..098e04a9798 100644
--- a/tests/ci/team_keys_lambda/requirements.txt
+++ b/tests/ci/team_keys_lambda/requirements.txt
@@ -1 +1 @@
-requests<2.30
+../lambda_shared_package
diff --git a/tests/ci/terminate_runner_lambda/__init__.py b/tests/ci/terminate_runner_lambda/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py
index c9192417575..98b14508314 100644
--- a/tests/ci/terminate_runner_lambda/app.py
+++ b/tests/ci/terminate_runner_lambda/app.py
@@ -4,132 +4,45 @@ import argparse
 import json
 import sys
 import time
-from collections import namedtuple
 from dataclasses import dataclass
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List
 
 import boto3  # type: ignore
-import requests  # type: ignore
-import jwt
 
-
-def get_key_and_app_from_aws() -> Tuple[str, int]:
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
-
-def get_installation_id(jwt_token: str) -> int:
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-            break
-
-    return installation_id  # type: ignore
-
-
-def get_access_token(jwt_token: str, installation_id: int) -> str:
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]  # type: ignore
+from lambda_shared import RunnerDescriptions, list_runners
+from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token
 
 
 @dataclass
-class CachedToken:
+class CachedInstances:
     time: int
-    value: str
+    value: dict
+    updating: bool = False
 
 
-cached_token = CachedToken(0, "")
+cached_instances = CachedInstances(0, {})
 
 
-def get_cached_access_token() -> str:
-    if time.time() - 500 < cached_token.time:
-        return cached_token.value
-    private_key, app_id = get_key_and_app_from_aws()
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    cached_token.time = int(time.time())
-    cached_token.value = get_access_token(encoded_jwt, installation_id)
-    return cached_token.value
-
-
-RunnerDescription = namedtuple(
-    "RunnerDescription", ["id", "name", "tags", "offline", "busy"]
-)
-RunnerDescriptions = List[RunnerDescription]
-
-
-def list_runners(access_token: str) -> RunnerDescriptions:
-    headers = {
-        "Authorization": f"token {access_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    per_page = 100
-    response = requests.get(
-        f"https://api.github.com/orgs/ClickHouse/actions/runners?per_page={per_page}",
-        headers=headers,
+def get_cached_instances() -> dict:
+    """return cached instances description with updating it once per five minutes"""
+    if time.time() - 250 < cached_instances.time or cached_instances.updating:
+        return cached_instances.value
+    # Indicate that the value is updating now, so the cached value can be
+    # used. The first setting and close-to-ttl are not counted as update
+    if cached_instances.time != 0 or time.time() - 300 < cached_instances.time:
+        cached_instances.updating = True
+    ec2_client = boto3.client("ec2")
+    instances_response = ec2_client.describe_instances(
+        Filters=[{"Name": "instance-state-name", "Values": ["running"]}]
     )
-    response.raise_for_status()
-    data = response.json()
-    total_runners = data["total_count"]
-    print("Expected total runners", total_runners)
-    runners = data["runners"]
-
-    # round to 0 for 0, 1 for 1..100, but to 2 for 101..200
-    total_pages = (total_runners - 1) // per_page + 1
-
-    print("Total pages", total_pages)
-    for i in range(2, total_pages + 1):
-        response = requests.get(
-            "https://api.github.com/orgs/ClickHouse/actions/runners"
-            f"?page={i}&per_page={per_page}",
-            headers=headers,
-        )
-        response.raise_for_status()
-        data = response.json()
-        runners += data["runners"]
-
-    print("Total runners", len(runners))
-    result = []
-    for runner in runners:
-        tags = [tag["name"] for tag in runner["labels"]]
-        desc = RunnerDescription(
-            id=runner["id"],
-            name=runner["name"],
-            tags=tags,
-            offline=runner["status"] == "offline",
-            busy=runner["busy"],
-        )
-        result.append(desc)
-
-    return result
+    cached_instances.time = int(time.time())
+    cached_instances.value = {
+        instance["InstanceId"]: instance
+        for reservation in instances_response["Reservations"]
+        for instance in reservation["Instances"]
+    }
+    cached_instances.updating = False
+    return cached_instances.value
 
 
 def how_many_instances_to_kill(event_data: dict) -> Dict[str, int]:
@@ -160,11 +73,37 @@ def get_candidates_to_be_killed(event_data: dict) -> Dict[str, List[str]]:
 
 
 def main(access_token: str, event: dict) -> Dict[str, List[str]]:
-    print("Got event", json.dumps(event, sort_keys=True, indent=4))
+    start = time.time()
+    print("Got event", json.dumps(event, sort_keys=True).replace("\n", ""))
     to_kill_by_zone = how_many_instances_to_kill(event)
     instances_by_zone = get_candidates_to_be_killed(event)
+    # Getting ASG and instances' descriptions from the API
+    # We don't kill instances that alive for less than 10 minutes, since they
+    # could be not in the GH active runners yet
+    print(f"Check other hosts from the same ASG {event['AutoScalingGroupName']}")
+    asg_client = boto3.client("autoscaling")
+    as_groups_response = asg_client.describe_auto_scaling_groups(
+        AutoScalingGroupNames=[event["AutoScalingGroupName"]]
+    )
+    assert len(as_groups_response["AutoScalingGroups"]) == 1
+    asg = as_groups_response["AutoScalingGroups"][0]
+    asg_instance_ids = [instance["InstanceId"] for instance in asg["Instances"]]
+    instance_descriptions = get_cached_instances()
+    # The instances launched less than 10 minutes ago
+    immune_ids = [
+        instance["InstanceId"]
+        for instance in instance_descriptions.values()
+        if start - instance["LaunchTime"].timestamp() < 600
+    ]
+    # if the ASG's instance ID not in instance_descriptions, it's most probably
+    # is not cached yet, so we must mark it as immuned
+    immune_ids.extend(
+        iid for iid in asg_instance_ids if iid not in instance_descriptions
+    )
+    print("Time spent on the requests to AWS: ", time.time() - start)
 
     runners = list_runners(access_token)
+    runner_ids = set(runner.name for runner in runners)
     # We used to delete potential hosts to terminate from GitHub runners pool,
     # but the documentation states:
     # --- Returning an instance first in the response data does not guarantee its termination
@@ -177,18 +116,23 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
         total_to_kill += num_to_kill
         if num_to_kill > len(candidates):
             raise Exception(
-                f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}"
+                f"Required to kill {num_to_kill}, but have only {len(candidates)}"
+                f" candidates in AV {zone}"
             )
 
         delete_for_av = []  # type: RunnerDescriptions
         for candidate in candidates:
-            if candidate not in set(runner.name for runner in runners):
+            if candidate in immune_ids:
+                print(
+                    f"Candidate {candidate} started less than 10 minutes ago, won't touch a child"
+                )
+                break
+            if candidate not in runner_ids:
                 print(
                     f"Candidate {candidate} was not in runners list, simply delete it"
                 )
                 instances_to_kill.append(candidate)
-
-        for candidate in candidates:
+                break
             if len(delete_for_av) + len(instances_to_kill) == num_to_kill:
                 break
             if candidate in instances_to_kill:
@@ -207,22 +151,18 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
 
         if len(delete_for_av) < num_to_kill:
             print(
-                f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}"
+                f"Checked all candidates for av {zone}, get to delete "
+                f"{len(delete_for_av)}, but still cannot get required {num_to_kill}"
             )
 
         instances_to_kill += [runner.name for runner in delete_for_av]
 
     if len(instances_to_kill) < total_to_kill:
-        print(f"Check other hosts from the same ASG {event['AutoScalingGroupName']}")
-        client = boto3.client("autoscaling")
-        as_groups = client.describe_auto_scaling_groups(
-            AutoScalingGroupNames=[event["AutoScalingGroupName"]]
-        )
-        assert len(as_groups["AutoScalingGroups"]) == 1
-        asg = as_groups["AutoScalingGroups"][0]
-        for instance in asg["Instances"]:
+        for instance in asg_instance_ids:
+            if instance in immune_ids:
+                continue
             for runner in runners:
-                if runner.name == instance["InstanceId"] and not runner.busy:
+                if runner.name == instance and not runner.busy:
                     print(f"Runner {runner.name} is not busy and can be deleted")
                     instances_to_kill.append(runner.name)
 
@@ -230,9 +170,9 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
                 print("Got enough instances to kill")
                 break
 
-    print("Got instances to kill: ", ", ".join(instances_to_kill))
     response = {"InstanceIDs": instances_to_kill}
-    print(response)
+    print("Got instances to kill: ", response)
+    print("Time spent on the request: ", time.time() - start)
     return response
 
 
@@ -270,6 +210,8 @@ if __name__ == "__main__":
         with open(args.private_key_path, "r") as key_file:
             private_key = key_file.read()
 
+    token = get_access_token_by_key_app(private_key, args.app_id)
+
     sample_event = {
         "AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1:<account-id>:autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg",
         "AutoScalingGroupName": "my-asg",
@@ -314,14 +256,4 @@ if __name__ == "__main__":
         "Cause": "SCALE_IN",
     }
 
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": args.app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    access_token = get_access_token(encoded_jwt, args.app_id)
-
-    main(access_token, sample_event)
+    main(token, sample_event)
diff --git a/tests/ci/terminate_runner_lambda/lambda_shared b/tests/ci/terminate_runner_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/terminate_runner_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/terminate_runner_lambda/requirements.txt b/tests/ci/terminate_runner_lambda/requirements.txt
index 98be09ab232..4cb3fba0f7b 100644
--- a/tests/ci/terminate_runner_lambda/requirements.txt
+++ b/tests/ci/terminate_runner_lambda/requirements.txt
@@ -1,3 +1 @@
-requests<2.30
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/workflow_approve_rerun_lambda/__init__.py b/tests/ci/workflow_approve_rerun_lambda/__init__.py
deleted file mode 100644
index 4265cc3e6c1..00000000000
--- a/tests/ci/workflow_approve_rerun_lambda/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-#!/usr/bin/env python
diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index 32cba5d466b..3db62430d85 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -5,9 +5,10 @@ import fnmatch
 import json
 import time
 
-import jwt
 import requests  # type: ignore
-import boto3  # type: ignore
+
+from lambda_shared.pr import TRUSTED_CONTRIBUTORS
+from lambda_shared.token import get_cached_access_token
 
 SUSPICIOUS_CHANGED_FILES_NUMBER = 200
 
@@ -67,108 +68,6 @@ NEED_RERUN_WORKFLOWS = {
     "ReleaseBranchCI",
 }
 
-# Individual trusted contirbutors who are not in any trusted organization.
-# Can be changed in runtime: we will append users that we learned to be in
-# a trusted org, to save GitHub API calls.
-TRUSTED_CONTRIBUTORS = {
-    e.lower()
-    for e in [
-        "achimbab",
-        "adevyatova ",  # DOCSUP
-        "Algunenano",  # Raúl Marín, Tinybird
-        "amosbird",
-        "AnaUvarova",  # DOCSUP
-        "anauvarova",  # technical writer, Yandex
-        "annvsh",  # technical writer, Yandex
-        "atereh",  # DOCSUP
-        "azat",
-        "bharatnc",  # Newbie, but already with many contributions.
-        "bobrik",  # Seasoned contributor, CloudFlare
-        "BohuTANG",
-        "codyrobert",  # Flickerbox engineer
-        "cwurm",  # Employee
-        "damozhaeva",  # DOCSUP
-        "den-crane",
-        "flickerbox-tom",  # Flickerbox
-        "gyuton",  # DOCSUP
-        "hagen1778",  # Roman Khavronenko, seasoned contributor
-        "hczhcz",
-        "hexiaoting",  # Seasoned contributor
-        "ildus",  # adjust, ex-pgpro
-        "javisantana",  # a Spanish ClickHouse enthusiast, ex-Carto
-        "ka1bi4",  # DOCSUP
-        "kirillikoff",  # DOCSUP
-        "kreuzerkrieg",
-        "lehasm",  # DOCSUP
-        "michon470",  # DOCSUP
-        "nikvas0",
-        "nvartolomei",
-        "olgarev",  # DOCSUP
-        "otrazhenia",  # Yandex docs contractor
-        "pdv-ru",  # DOCSUP
-        "podshumok",  # cmake expert from QRator Labs
-        "s-mx",  # Maxim Sabyanin, former employee, present contributor
-        "sevirov",  # technical writer, Yandex
-        "spongedu",  # Seasoned contributor
-        "taiyang-li",
-        "ucasFL",  # Amos Bird's friend
-        "vdimir",  # Employee
-        "vzakaznikov",
-        "YiuRULE",
-        "zlobober",  # Developer of YT
-        "ilejn",  # Arenadata, responsible for Kerberized Kafka
-        "thomoco",  # ClickHouse
-        "BoloniniD",  # Seasoned contributor, HSE
-        "tonickkozlov",  # Cloudflare
-        "tylerhannan",  # ClickHouse Employee
-        "myrrc",  # Mike Kot, DoubleCloud
-        "thevar1able",  # ClickHouse Employee
-        "aalexfvk",
-        "MikhailBurdukov",
-        "tsolodov",  # ClickHouse Employee
-        "kitaisreal",
-    ]
-}
-
-
-def get_installation_id(jwt_token):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.get("https://api.github.com/app/installations", headers=headers)
-    response.raise_for_status()
-    data = response.json()
-    for installation in data:
-        if installation["account"]["login"] == "ClickHouse":
-            installation_id = installation["id"]
-    return installation_id
-
-
-def get_access_token(jwt_token, installation_id):
-    headers = {
-        "Authorization": f"Bearer {jwt_token}",
-        "Accept": "application/vnd.github.v3+json",
-    }
-    response = requests.post(
-        f"https://api.github.com/app/installations/{installation_id}/access_tokens",
-        headers=headers,
-    )
-    response.raise_for_status()
-    data = response.json()
-    return data["token"]
-
-
-def get_key_and_app_from_aws():
-    secret_name = "clickhouse_github_secret_key"
-    session = boto3.session.Session()
-    client = session.client(
-        service_name="secretsmanager",
-    )
-    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    data = json.loads(get_secret_value_response["SecretString"])
-    return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
-
 
 def is_trusted_contributor(pr_user_login, pr_user_orgs):
     if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
@@ -331,19 +230,6 @@ def label_manual_approve(pull_request, token):
     _exec_post_with_retry(url, token, data)
 
 
-def get_token_from_aws():
-    private_key, app_id = get_key_and_app_from_aws()
-    payload = {
-        "iat": int(time.time()) - 60,
-        "exp": int(time.time()) + (10 * 60),
-        "iss": app_id,
-    }
-
-    encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
-    installation_id = get_installation_id(encoded_jwt)
-    return get_access_token(encoded_jwt, installation_id)
-
-
 def get_workflow_jobs(workflow_description, token):
     jobs_url = (
         workflow_description.api_url + f"/attempts/{workflow_description.attempt}/jobs"
@@ -443,7 +329,7 @@ def check_workflow_completed(
 
 
 def main(event):
-    token = get_token_from_aws()
+    token = get_cached_access_token()
     event_data = json.loads(event["body"])
     print("The body received:", event["body"])
     workflow_description = get_workflow_description_from_event(event_data)
diff --git a/tests/ci/workflow_approve_rerun_lambda/lambda_shared b/tests/ci/workflow_approve_rerun_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/workflow_approve_rerun_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/workflow_approve_rerun_lambda/requirements.txt b/tests/ci/workflow_approve_rerun_lambda/requirements.txt
index 98be09ab232..4cb3fba0f7b 100644
--- a/tests/ci/workflow_approve_rerun_lambda/requirements.txt
+++ b/tests/ci/workflow_approve_rerun_lambda/requirements.txt
@@ -1,3 +1 @@
-requests<2.30
-PyJWT
-cryptography<38
+../lambda_shared_package[token]
diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py
index bc8e1212be5..c624a492604 100644
--- a/tests/ci/workflow_jobs_lambda/app.py
+++ b/tests/ci/workflow_jobs_lambda/app.py
@@ -10,13 +10,11 @@ fields for private repositories
 
 from base64 import b64decode
 from dataclasses import dataclass
-from typing import Any, List
+from typing import Any, List, Optional
 import json
 import logging
-import time
 
-import boto3  # type: ignore
-import requests  # type: ignore
+from lambda_shared import ClickHouseHelper, InsertException, get_parameter_from_ssm
 
 logging.getLogger().setLevel(logging.INFO)
 
@@ -66,137 +64,7 @@ class WorkflowJob:
         return self.__dict__
 
 
-### VENDORING
-def get_parameter_from_ssm(name, decrypt=True, client=None):
-    if not client:
-        client = boto3.client("ssm", region_name="us-east-1")
-    return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
-
-
-class InsertException(Exception):
-    pass
-
-
-class ClickHouseHelper:
-    def __init__(self, url=None):
-        if url is None:
-            url = get_parameter_from_ssm("clickhouse-test-stat-url")
-
-        self.url = url
-        self.auth = {
-            "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
-            "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"),
-        }
-
-    @staticmethod
-    def _insert_json_str_info_impl(url, auth, db, table, json_str):
-        params = {
-            "database": db,
-            "query": f"INSERT INTO {table} FORMAT JSONEachRow",
-            "date_time_input_format": "best_effort",
-            "send_logs_level": "warning",
-        }
-
-        for i in range(5):
-            try:
-                response = requests.post(
-                    url, params=params, data=json_str, headers=auth
-                )
-            except Exception as e:
-                error = f"Received exception while sending data to {url} on {i} attempt: {e}"
-                logging.warning(error)
-                continue
-
-            logging.info("Response content '%s'", response.content)
-
-            if response.ok:
-                break
-
-            error = (
-                "Cannot insert data into clickhouse at try "
-                + str(i)
-                + ": HTTP code "
-                + str(response.status_code)
-                + ": '"
-                + str(response.text)
-                + "'"
-            )
-
-            if response.status_code >= 500:
-                # A retriable error
-                time.sleep(1)
-                continue
-
-            logging.info(
-                "Request headers '%s', body '%s'",
-                response.request.headers,
-                response.request.body,
-            )
-
-            raise InsertException(error)
-        else:
-            raise InsertException(error)
-
-    def _insert_json_str_info(self, db, table, json_str):
-        self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str)
-
-    def insert_event_into(self, db, table, event, safe=True):
-        event_str = json.dumps(event)
-        try:
-            self._insert_json_str_info(db, table, event_str)
-        except InsertException as e:
-            logging.error(
-                "Exception happened during inserting data into clickhouse: %s", e
-            )
-            if not safe:
-                raise
-
-    def insert_events_into(self, db, table, events, safe=True):
-        jsons = []
-        for event in events:
-            jsons.append(json.dumps(event))
-
-        try:
-            self._insert_json_str_info(db, table, ",".join(jsons))
-        except InsertException as e:
-            logging.error(
-                "Exception happened during inserting data into clickhouse: %s", e
-            )
-            if not safe:
-                raise
-
-    def _select_and_get_json_each_row(self, db, query):
-        params = {
-            "database": db,
-            "query": query,
-            "default_format": "JSONEachRow",
-        }
-        for i in range(5):
-            response = None
-            try:
-                response = requests.get(self.url, params=params, headers=self.auth)
-                response.raise_for_status()
-                return response.text
-            except Exception as ex:
-                logging.warning("Cannot insert with exception %s", str(ex))
-                if response:
-                    logging.warning("Reponse text %s", response.text)
-                time.sleep(0.1 * i)
-
-        raise Exception("Cannot fetch data from clickhouse")
-
-    def select_json_each_row(self, db, query):
-        text = self._select_and_get_json_each_row(db, query)
-        result = []
-        for line in text.split("\n"):
-            if line:
-                result.append(json.loads(line))
-        return result
-
-
-### VENDORING END
-
-clickhouse_client = ClickHouseHelper()
+CH_CLIENT = None  # type: Optional[ClickHouseHelper]
 
 
 def send_event_workflow_job(workflow_job: WorkflowJob) -> None:
@@ -232,23 +100,30 @@ def send_event_workflow_job(workflow_job: WorkflowJob) -> None:
     # PARTITION BY toStartOfMonth(started_at)
     # ORDER BY (id, updated_at)
     # SETTINGS index_granularity = 8192
-    global clickhouse_client
-    kwargs = {
-        "db": "default",
-        "table": "workflow_jobs",
-        "event": workflow_job.as_dict(),
-        "safe": False,
-    }
+    global CH_CLIENT
+    CH_CLIENT = CH_CLIENT or ClickHouseHelper(
+        get_parameter_from_ssm("clickhouse-test-stat-url"),
+        get_parameter_from_ssm("clickhouse-test-stat-login"),
+        get_parameter_from_ssm("clickhouse-test-stat-password"),
+    )
     try:
-        clickhouse_client.insert_event_into(**kwargs)
+        CH_CLIENT.insert_event_into(
+            "default", "workflow_jobs", workflow_job.as_dict(), False
+        )
     except InsertException as ex:
         logging.exception(
             "Got an exception on insert, tryuing to update the client "
             "credentials and repeat",
             exc_info=ex,
         )
-        clickhouse_client = ClickHouseHelper()
-        clickhouse_client.insert_event_into(**kwargs)
+        CH_CLIENT = ClickHouseHelper(
+            get_parameter_from_ssm("clickhouse-test-stat-url"),
+            get_parameter_from_ssm("clickhouse-test-stat-login"),
+            get_parameter_from_ssm("clickhouse-test-stat-password"),
+        )
+        CH_CLIENT.insert_event_into(
+            "default", "workflow_jobs", workflow_job.as_dict(), False
+        )
 
 
 def handler(event: dict, context: Any) -> dict:
@@ -257,6 +132,7 @@ def handler(event: dict, context: Any) -> dict:
     else:
         event_data = json.loads(event["body"])
 
+    logging.info("Got the next raw event from the github hook: %s", event_data)
     repo = event_data["repository"]
     try:
         wf_job = event_data["workflow_job"]
@@ -265,6 +141,9 @@ def handler(event: dict, context: Any) -> dict:
         logging.error("The event data: %s", event)
         logging.error("The context data: %s", context)
 
+    # We record only finished steps
+    steps = len([step for step in wf_job["steps"] if step["conclusion"] is not None])
+
     workflow_job = WorkflowJob(
         wf_job["id"],
         wf_job["run_id"],
@@ -281,7 +160,7 @@ def handler(event: dict, context: Any) -> dict:
         wf_job["started_at"],
         wf_job["completed_at"] or "1970-01-01T00:00:00",  # nullable date
         wf_job["name"],
-        len(wf_job["steps"]),
+        steps,
         wf_job["check_run_url"],
         wf_job["labels"],
         wf_job["runner_id"] or 0,  # nullable
diff --git a/tests/ci/workflow_jobs_lambda/lambda_shared b/tests/ci/workflow_jobs_lambda/lambda_shared
new file mode 120000
index 00000000000..ba86e090f6c
--- /dev/null
+++ b/tests/ci/workflow_jobs_lambda/lambda_shared
@@ -0,0 +1 @@
+../lambda_shared_package/lambda_shared
\ No newline at end of file
diff --git a/tests/ci/workflow_jobs_lambda/requirements.txt b/tests/ci/workflow_jobs_lambda/requirements.txt
index 3bcbe2dfd07..098e04a9798 100644
--- a/tests/ci/workflow_jobs_lambda/requirements.txt
+++ b/tests/ci/workflow_jobs_lambda/requirements.txt
@@ -1 +1 @@
-requests<2.30
+../lambda_shared_package
diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
index 15239041478..96d59d5633e 100644
--- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
@@ -12,6 +12,7 @@
                 <disk>s3</disk>
                 <max_size>100000000</max_size>
                 <path>./cache_s3/</path>
+                <cache_on_write_operations>1</cache_on_write_operations>
             </cache_s3>
         </disks>
         <policies>
diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/users.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/users.xml
new file mode 100644
index 00000000000..5de169edc1e
--- /dev/null
+++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/users.xml
@@ -0,0 +1,7 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <enable_filesystem_cache_on_write_operations>1</enable_filesystem_cache_on_write_operations>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py
index eca18820016..72a01d278d8 100644
--- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py
+++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py
@@ -19,6 +19,7 @@ def cluster():
         cluster.add_instance(
             "node1",
             main_configs=["configs/config.d/storage_conf.xml"],
+            user_configs=["configs/config.d/users.xml"],
             macros={"replica": "1"},
             with_minio=True,
             with_zookeeper=True,
@@ -26,12 +27,14 @@ def cluster():
         cluster.add_instance(
             "node2",
             main_configs=["configs/config.d/storage_conf.xml"],
+            user_configs=["configs/config.d/users.xml"],
             macros={"replica": "2"},
             with_zookeeper=True,
         )
         cluster.add_instance(
             "node3",
             main_configs=["configs/config.d/storage_conf.xml"],
+            user_configs=["configs/config.d/users.xml"],
             macros={"replica": "3"},
             with_zookeeper=True,
         )
@@ -74,7 +77,7 @@ def generate_values(date_str, count, sign=1):
 
 def create_table(cluster, additional_settings=None):
     create_table_statement = """
-        CREATE TABLE s3_test ON CLUSTER cluster(
+        CREATE TABLE s3_test ON CLUSTER cluster (
             dt Date,
             id Int64,
             data String,
@@ -95,7 +98,8 @@ def create_table(cluster, additional_settings=None):
 def drop_table(cluster):
     yield
     for node in list(cluster.instances.values()):
-        node.query("DROP TABLE IF EXISTS s3_test")
+        node.query("DROP TABLE IF EXISTS s3_test SYNC")
+        node.query("DROP TABLE IF EXISTS test_drop_table SYNC")
 
     minio = cluster.minio_client
     # Remove extra objects to prevent tests cascade failing
diff --git a/tests/queries/0_stateless/00988_parallel_parts_removal.sql b/tests/queries/0_stateless/00988_parallel_parts_removal.sql
index bff9bbe6d8d..8f79276782b 100644
--- a/tests/queries/0_stateless/00988_parallel_parts_removal.sql
+++ b/tests/queries/0_stateless/00988_parallel_parts_removal.sql
@@ -1,6 +1,6 @@
 DROP TABLE IF EXISTS mt;
 
-CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS max_part_removal_threads = 16, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, old_parts_lifetime = 1, parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;
+CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, old_parts_lifetime = 1, parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;
 
 SYSTEM STOP MERGES mt;
 
diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
index 13cd56e1924..a05515cf756 100644
--- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql
+++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
@@ -2,7 +2,7 @@
 
 DROP TABLE IF EXISTS mt;
 
-CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS max_part_loading_threads = 16, parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;
+CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;
 
 SYSTEM STOP MERGES mt;
 
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
index 42ae5e84f44..db62dedb5b4 100755
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long
+# Tags: long, no-ubsan
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh
index f8f49816479..87153a4bd58 100755
--- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh
+++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh
@@ -11,6 +11,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
+# The number of threads removing data parts should be between 1 and 129.
+# Because max_parts_cleaning_thread_pool_size is 128 by default
+
 $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -nm -q "create database ordinary_$CLICKHOUSE_DATABASE engine=Ordinary"
 
 # MergeTree
@@ -22,7 +25,7 @@ $CLICKHOUSE_CLIENT -nm -q """
     Engine=MergeTree()
     order by key
     partition by key%100
-    settings max_part_removal_threads=10, concurrent_part_removal_threshold=99, min_bytes_for_wide_part=0;
+    settings concurrent_part_removal_threshold=99, min_bytes_for_wide_part=0;
 
     insert into data_01810 select * from numbers(100);
     drop table data_01810 settings log_queries=1;
@@ -30,7 +33,7 @@ $CLICKHOUSE_CLIENT -nm -q """
 
     -- sometimes the same thread can be used to remove part, due to ThreadPool,
     -- hence we cannot compare strictly.
-    select throwIf(not(length(thread_ids) between 1 and 11))
+    select throwIf(not(length(thread_ids) between 1 and 129))
     from system.query_log
     where
         event_date >= yesterday() and
@@ -49,7 +52,7 @@ $CLICKHOUSE_CLIENT -nm -q """
     Engine=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rep_data_01810', '1')
     order by key
     partition by key%100
-    settings max_part_removal_threads=10, concurrent_part_removal_threshold=99, min_bytes_for_wide_part=0;
+    settings concurrent_part_removal_threshold=99, min_bytes_for_wide_part=0;
 
     SET insert_keeper_max_retries=1000;
     SET insert_keeper_retry_max_backoff_ms=10;
@@ -60,7 +63,7 @@ $CLICKHOUSE_CLIENT -nm -q """
 
     -- sometimes the same thread can be used to remove part, due to ThreadPool,
     -- hence we cannot compare strictly.
-    select throwIf(not(length(thread_ids) between 1 and 11))
+    select throwIf(not(length(thread_ids) between 1 and 129))
     from system.query_log
     where
         event_date >= yesterday() and
diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
index 88fb2cdf9b1..5b9342972f4 100644
--- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
+++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
@@ -8,7 +8,7 @@ drop table if exists rmt2;
 -- Disable compact parts, because we need hardlinks in mutations.
 create table rmt (n int, m int, k int) engine=ReplicatedMergeTree('/test/02432/{database}', '1') order by tuple()
     settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1,
-        max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
+        concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
         max_replicated_merges_in_queue=0, max_replicated_mutations_in_queue=0, min_bytes_for_wide_part=0, min_rows_for_wide_part=0;
 
 insert into rmt(n, m) values (1, 42);
@@ -38,7 +38,7 @@ select count(), sum(n), sum(m) from rmt;
 -- New table can assign merges/mutations and can remove old parts
 create table rmt2 (n int, m int, k String) engine=ReplicatedMergeTree('/test/02432/{database}', '2') order by tuple()
     settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1,
-        max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
+        concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1,
         min_bytes_for_wide_part=0, min_rows_for_wide_part=0, max_replicated_merges_in_queue=1,
         old_parts_lifetime=0;
 
@@ -66,4 +66,3 @@ drop table rmt2;
 system flush logs;
 select count() > 0 from system.text_log where yesterday() <= event_date and logger_name like '%' || currentDatabase() || '%' and message like '%Removing % parts from filesystem (concurrently): Parts:%';
 select count() > 1, countDistinct(thread_id) > 1 from system.text_log where yesterday() <= event_date and logger_name like '%' || currentDatabase() || '%' and message like '%Removing % parts in blocks range%';
-
diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference
index 437012dd516..79871e3716c 100644
--- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference
+++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.reference
@@ -11,3 +11,19 @@
 (['ClickHouse Documentation','ClickHouse'],[0,1],['/en'],['ClickHouse'])
 (['Documentation','GitHub'],[2,3],[NULL],[])
 (['Documentation','GitHub'],[2,3],[NULL],[])
+ClickHouse
+['ClickHouse']
+ClickHouse Documentation
+['ClickHouse Documentation','ClickHouse','Documentation']
+GitHub Documentation
+['GitHub Documentation','GitHub']
+Documentation
+['Documentation']
+ClickHouse
+['ClickHouse']
+ClickHouse Documentation
+['ClickHouse Documentation','ClickHouse','Documentation']
+GitHub Documentation
+['GitHub Documentation','GitHub']
+Documentation
+['Documentation']
diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh
index ac0793460a9..5e8985406ae 100755
--- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh
+++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh
@@ -175,6 +175,70 @@ select dictGetAll('regexp_dict3', ('tag', 'topological_index', 'captured', 'pare
 select dictGetAll('regexp_dict3', ('tag', 'topological_index', 'captured', 'parent'), 'github.com/clickhouse/tree/master/docs', 2);
 "
 
+# Test that things work the same for "simple" regexps that go through Hyperscan and "complex" regexps that go through RE2.
+# An easy way to force the use of RE2 is to disable Hyperscan.
+# This tree is constructed purposely so that text might (falsely) match leaf nodes without matching their corresponding parent nodes
+cat > "$yaml" <<EOL
+- regexp: 'clickhouse\.com'
+  tag: 'ClickHouse'
+  paths:
+    - regexp: 'docs'
+      tag: 'ClickHouse Documentation'
+
+- regexp: 'github\.com'
+  tag: 'GitHub'
+  paths:
+    - regexp: 'docs'
+      tag: 'GitHub Documentation'
+
+- regexp: '/docs(/|$)'
+  tag: 'Documentation'
+EOL
+
+$CLICKHOUSE_CLIENT -n --query="
+drop dictionary if exists regexp_dict3;
+create dictionary regexp_dict3
+(
+    regexp String,
+    tag String
+)
+PRIMARY KEY(regexp)
+SOURCE(YAMLRegExpTree(PATH '$yaml'))
+LIFETIME(0)
+LAYOUT(regexp_tree)
+SETTINGS(regexp_dict_allow_hyperscan = true);
+
+select dictGet('regexp_dict3', 'tag', 'clickhouse.com');
+select dictGetAll('regexp_dict3', 'tag', 'clickhouse.com');
+select dictGet('regexp_dict3', 'tag', 'clickhouse.com/docs');
+select dictGetAll('regexp_dict3', 'tag', 'clickhouse.com/docs');
+select dictGet('regexp_dict3', 'tag', 'docs.github.com');
+select dictGetAll('regexp_dict3', 'tag', 'docs.github.com');
+select dictGet('regexp_dict3', 'tag', '/docs');
+select dictGetAll('regexp_dict3', 'tag', '/docs');
+
+drop dictionary if exists regexp_dict3;
+create dictionary regexp_dict3
+(
+    regexp String,
+    tag String
+)
+PRIMARY KEY(regexp)
+SOURCE(YAMLRegExpTree(PATH '$yaml'))
+LIFETIME(0)
+LAYOUT(regexp_tree)
+SETTINGS(regexp_dict_allow_hyperscan = false);
+
+select dictGet('regexp_dict3', 'tag', 'clickhouse.com');
+select dictGetAll('regexp_dict3', 'tag', 'clickhouse.com');
+select dictGet('regexp_dict3', 'tag', 'clickhouse.com/docs');
+select dictGetAll('regexp_dict3', 'tag', 'clickhouse.com/docs');
+select dictGet('regexp_dict3', 'tag', 'docs.github.com');
+select dictGetAll('regexp_dict3', 'tag', 'docs.github.com');
+select dictGet('regexp_dict3', 'tag', '/docs');
+select dictGetAll('regexp_dict3', 'tag', '/docs');
+"
+
 $CLICKHOUSE_CLIENT -n --query="
 drop dictionary regexp_dict1;
 drop dictionary regexp_dict2;
diff --git a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference
deleted file mode 100644
index 54704fb3b3e..00000000000
--- a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference
+++ /dev/null
@@ -1,108 +0,0 @@
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 < \'1993-01-01\') OR (date1 > \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 > \'1993-12-31\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 <= \'1993-12-31\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1997-12-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) OR ((date1 >= \'1994-01-01\') AND (date1 <= \'1994-12-31\'))) AND ((id >= 1) AND (id <= 3))
-SELECT
-    value1,
-    toYear(date1) AS year1
-FROM t
-WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-PREWHERE (date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')
-WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\'))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1900-02-01\') AND (date1 <= \'1900-02-28\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1992-02-01\') AND (date1 <= \'1992-02-29\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'2000-02-01\') AND (date1 <= \'2000-02-29\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-01-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-02-01\') AND (date1 <= \'1993-02-28\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-03-01\') AND (date1 <= \'1993-03-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-04-01\') AND (date1 <= \'1993-04-30\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-05-01\') AND (date1 <= \'1993-05-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-06-01\') AND (date1 <= \'1993-06-30\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-07-01\') AND (date1 <= \'1993-07-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-08-01\') AND (date1 <= \'1993-08-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-09-01\') AND (date1 <= \'1993-09-30\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-10-01\') AND (date1 <= \'1993-10-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-11-01\') AND (date1 <= \'1993-11-30\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1993-12-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 < \'1992-03-01\') OR (date1 > \'1992-03-31\')) AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 > \'1992-03-31\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 <= \'1992-03-31\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3))
-SELECT value1
-FROM t
-WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\'))) AND ((id >= 1) AND (id <= 3))
diff --git a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql
deleted file mode 100644
index a04273bbb18..00000000000
--- a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql
+++ /dev/null
@@ -1,40 +0,0 @@
-DROP TABLE IF EXISTS t;
-CREATE TABLE t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id;
-
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM t WHERE year1 = 1993 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 190002 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199202 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 200002 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199301 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199302 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199303 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199304 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199305 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199306 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199307 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199308 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199309 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199310 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199311 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3;
-EXPLAIN SYNTAX SELECT value1 FROM t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3;
-
-DROP TABLE t;
diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
new file mode 100644
index 00000000000..786360783fd
--- /dev/null
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
@@ -0,0 +1,40 @@
+1	2	3
+1	2	3
+1	2	3
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+      Skip
+        Name: xy_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
new file mode 100644
index 00000000000..a49239e9de2
--- /dev/null
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
@@ -0,0 +1,30 @@
+SET allow_experimental_analyzer = 0;
+
+DROP TABLE IF EXISTS data_02771;
+
+
+CREATE TABLE data_02771
+(
+    key Int,
+    x Int,
+    y Int,
+    INDEX x_idx x TYPE minmax GRANULARITY 1,
+    INDEX y_idx y TYPE minmax GRANULARITY 1,
+    INDEX xy_idx (x,y) TYPE minmax GRANULARITY 1
+)
+Engine=MergeTree()
+ORDER BY key;
+
+INSERT INTO data_02771 VALUES (1, 2, 3);
+
+SELECT * FROM data_02771;
+SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices=''; -- { serverError 6 }
+SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='x_idx';
+SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='na_idx';
+
+SELECT * FROM data_02771 WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- { serverError 277 }
+SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
+SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
+SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
+
+DROP TABLE data_02771;
diff --git a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference
new file mode 100644
index 00000000000..d7a540ae479
--- /dev/null
+++ b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference
@@ -0,0 +1,43 @@
+-- { echoOn }
+EXPLAIN PIPELINE SELECT * FROM data FINAL WHERE v1 >= now() - INTERVAL 180 DAY
+SETTINGS max_threads=2, max_final_threads=2, force_data_skipping_indices='v1_index', use_skip_indexes_if_final=1
+FORMAT LineAsString;
+(Expression)
+ExpressionTransform × 2
+  (Filter)
+  FilterTransform × 2
+    (ReadFromMergeTree)
+    ExpressionTransform × 2
+      AggregatingSortedTransform 2 → 1
+        ExpressionTransform × 2
+          FilterSortedStreamByRange × 2
+          Description: filter values in [(999424), +inf)
+            ExpressionTransform × 2
+              MergeTreeInOrder × 2 0 → 1
+                AggregatingSortedTransform
+                  ExpressionTransform
+                    FilterSortedStreamByRange
+                    Description: filter values in [-inf, (999424))
+                      ExpressionTransform
+                        MergeTreeInOrder 0 → 1
+EXPLAIN PIPELINE SELECT * FROM data FINAL WHERE v1 >= now() - INTERVAL 180 DAY
+SETTINGS max_threads=2, max_final_threads=2, force_data_skipping_indices='v1_index', use_skip_indexes_if_final=0
+FORMAT LineAsString;
+(Expression)
+ExpressionTransform × 2
+  (Filter)
+  FilterTransform × 2
+    (ReadFromMergeTree)
+    ExpressionTransform × 2
+      AggregatingSortedTransform 2 → 1
+        ExpressionTransform × 2
+          FilterSortedStreamByRange × 2
+          Description: filter values in [(999424), +inf)
+            ExpressionTransform × 2
+              MergeTreeInOrder × 2 0 → 1
+                AggregatingSortedTransform
+                  ExpressionTransform
+                    FilterSortedStreamByRange
+                    Description: filter values in [-inf, (999424))
+                      ExpressionTransform
+                        MergeTreeInOrder 0 → 1
diff --git a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.sql b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.sql
new file mode 100644
index 00000000000..7de7a58e2e1
--- /dev/null
+++ b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.sql
@@ -0,0 +1,28 @@
+-- Tags: no-random-merge-tree-settings, no-random-settings
+
+DROP TABLE IF EXISTS data;
+
+CREATE TABLE data
+(
+    key  Int,
+    v1   DateTime,
+    INDEX v1_index v1 TYPE minmax GRANULARITY 1
+) ENGINE=AggregatingMergeTree()
+ORDER BY key
+SETTINGS index_granularity=8192, min_bytes_for_wide_part=0, min_rows_for_wide_part=0;
+
+SYSTEM STOP MERGES data;
+
+-- generate 50% of marks that cannot be skipped with v1_index
+-- this will create a gap in marks
+INSERT INTO data SELECT number,     if(number/8192 % 2 == 0, now(), now() - INTERVAL 200 DAY) FROM numbers(1e6);
+INSERT INTO data SELECT number+1e6, if(number/8192 % 2 == 0, now(), now() - INTERVAL 200 DAY) FROM numbers(1e6);
+
+-- { echoOn }
+EXPLAIN PIPELINE SELECT * FROM data FINAL WHERE v1 >= now() - INTERVAL 180 DAY
+SETTINGS max_threads=2, max_final_threads=2, force_data_skipping_indices='v1_index', use_skip_indexes_if_final=1
+FORMAT LineAsString;
+
+EXPLAIN PIPELINE SELECT * FROM data FINAL WHERE v1 >= now() - INTERVAL 180 DAY
+SETTINGS max_threads=2, max_final_threads=2, force_data_skipping_indices='v1_index', use_skip_indexes_if_final=0
+FORMAT LineAsString;
diff --git a/tests/queries/0_stateless/02782_avro_decimals.reference b/tests/queries/0_stateless/02782_avro_decimals.reference
new file mode 100644
index 00000000000..ed46f1c3758
--- /dev/null
+++ b/tests/queries/0_stateless/02782_avro_decimals.reference
@@ -0,0 +1,13 @@
+d	Decimal(14, 4)					
+0
+1
+1.1
+12.12
+123.123
+1234.1234
+12345.1234
+123456.1234
+1234567.1234
+12345678.1234
+123456789.1234
+1234567890.1234
diff --git a/tests/queries/0_stateless/02782_avro_decimals.sh b/tests/queries/0_stateless/02782_avro_decimals.sh
new file mode 100755
index 00000000000..5b754965806
--- /dev/null
+++ b/tests/queries/0_stateless/02782_avro_decimals.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "desc file('$CUR_DIR/data_avro/decimals.avro')"
+$CLICKHOUSE_LOCAL -q "select * from file('$CUR_DIR/data_avro/decimals.avro')"
+
diff --git a/tests/queries/0_stateless/02782_values_null_to_lc_nullable.reference b/tests/queries/0_stateless/02782_values_null_to_lc_nullable.reference
new file mode 100644
index 00000000000..dec7d2fabd2
--- /dev/null
+++ b/tests/queries/0_stateless/02782_values_null_to_lc_nullable.reference
@@ -0,0 +1 @@
+\N
diff --git a/tests/queries/0_stateless/02782_values_null_to_lc_nullable.sql b/tests/queries/0_stateless/02782_values_null_to_lc_nullable.sql
new file mode 100644
index 00000000000..250fe6b7551
--- /dev/null
+++ b/tests/queries/0_stateless/02782_values_null_to_lc_nullable.sql
@@ -0,0 +1,2 @@
+select * from values('s LowCardinality(Nullable(String))', (NULL));
+
diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference
new file mode 100644
index 00000000000..cd689b93034
--- /dev/null
+++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference
@@ -0,0 +1,2 @@
+2021-12-31 23:00:00	0
+2021-12-31 23:00:00	0
diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql
new file mode 100644
index 00000000000..abb13f1005e
--- /dev/null
+++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql
@@ -0,0 +1,13 @@
+CREATE TABLE source
+(
+    `ts` DateTime('UTC'),
+    `n` Int32
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(ts)
+ORDER BY tuple();
+
+INSERT INTO source values ('2021-12-31 23:00:00', 0);
+
+SELECT * FROM source WHERE toYYYYMM(ts) = 202112;
+SELECT * FROM source WHERE toYear(ts) = 2021;
diff --git a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.reference b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.reference
new file mode 100644
index 00000000000..48795e2cd39
--- /dev/null
+++ b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.reference
@@ -0,0 +1,12 @@
+100000
+100000
+100000
+100000
+100000
+100000
+02783_count-default_0_disabled	Not parallel	1	16
+02783_count-default_0_pure	Not parallel	1	16
+02783_count-default_0_pure_analyzer	Not parallel	1	16
+02783_count-default_1_disabled	Not parallel	1	16
+02783_count-default_1_pure	Not parallel	1	16
+02783_count-default_1_pure_analyzer	Not parallel	1	16
diff --git a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh
new file mode 100755
index 00000000000..4c29e513183
--- /dev/null
+++ b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+function has_used_parallel_replicas () {
+    $CLICKHOUSE_CLIENT --query "
+        SELECT
+            initial_query_id,
+            if(count() != 2, 'Used parallel', 'Not parallel'),
+            sumIf(read_rows, is_initial_query) as read_rows,
+            sumIf(read_bytes, is_initial_query) as read_bytes
+        FROM system.query_log
+    WHERE event_date >= yesterday() and initial_query_id LIKE '$1%'
+    GROUP BY initial_query_id
+    ORDER BY min(event_time_microseconds) ASC
+    FORMAT TSV"
+}
+
+function run_query_with_pure_parallel_replicas () {
+    $CLICKHOUSE_CLIENT \
+        --query "$2" \
+        --query_id "${1}_disabled" \
+        --max_parallel_replicas 0
+
+    $CLICKHOUSE_CLIENT \
+        --query "$2" \
+        --query_id "${1}_pure" \
+        --max_parallel_replicas 3 \
+        --prefer_localhost_replica 1 \
+        --use_hedged_requests 0 \
+        --cluster_for_parallel_replicas 'test_cluster_one_shard_three_replicas_localhost' \
+        --allow_experimental_parallel_reading_from_replicas 1 \
+        --allow_experimental_analyzer 0
+
+    # Not implemented yet
+    $CLICKHOUSE_CLIENT \
+        --query "$2" \
+        --query_id "${1}_pure_analyzer" \
+        --max_parallel_replicas 3 \
+        --prefer_localhost_replica 1 \
+        --use_hedged_requests 0 \
+        --cluster_for_parallel_replicas 'test_cluster_one_shard_three_replicas_localhost' \
+        --allow_experimental_parallel_reading_from_replicas 1 \
+        --allow_experimental_analyzer 1
+}
+
+function run_query_with_custom_key_parallel_replicas () {
+    $CLICKHOUSE_CLIENT \
+        --query "$2" \
+        --query_id "${1}_disabled" \
+        --max_parallel_replicas 0
+
+    $CLICKHOUSE_CLIENT \
+        --query "$2" \
+        --query_id "${1}_custom_key" \
+        --max_parallel_replicas 3 \
+        --use_hedged_requests 0 \
+        --parallel_replicas_custom_key_filter_type 'default' \
+        --parallel_replicas_custom_key "$2" \
+        --allow_experimental_analyzer 0
+
+    $CLICKHOUSE_CLIENT \
+        --query "$2" \
+        --query_id "${1}_custom_key_analyzer" \
+        --max_parallel_replicas 3 \
+        --use_hedged_requests 0 \
+        --parallel_replicas_custom_key_filter_type 'default' \
+        --parallel_replicas_custom_key "$2" \
+        --allow_experimental_analyzer 1
+}
+
+$CLICKHOUSE_CLIENT --query "
+    CREATE TABLE replicated_numbers
+    (
+        number Int64,
+    )
+    ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/replicated_numbers', 'r1')
+    ORDER BY (number)
+    AS SELECT number FROM numbers(100000);
+"
+
+query_id_base="02783_count-$CLICKHOUSE_DATABASE"
+
+run_query_with_pure_parallel_replicas "${query_id_base}_0" "SELECT count() FROM replicated_numbers"
+run_query_with_pure_parallel_replicas "${query_id_base}_1" "SELECT * FROM (SELECT count() FROM replicated_numbers) LIMIT 20"
+
+# Not implemented yet as the query fails to execute correctly to begin with
+#run_query_with_custom_key_parallel_replicas "${query_id_base}_2" "SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), replicated_numbers)" "sipHash64(number)"
+#run_query_with_custom_key_parallel_replicas "${query_id_base}_3" "SELECT * FROM (SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), replicated_numbers)) LIMIT 20" "sipHash64(number)"
+
+
+$CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
+has_used_parallel_replicas "${query_id_base}"
diff --git a/tests/queries/0_stateless/data_avro/decimals.avro b/tests/queries/0_stateless/data_avro/decimals.avro
new file mode 100644
index 00000000000..5c29ac235d5
Binary files /dev/null and b/tests/queries/0_stateless/data_avro/decimals.avro differ