From 640d87b04f72f37bd03a41674470866c3f544f95 Mon Sep 17 00:00:00 2001
From: Anton Popov <ap@clickhouse.com>
Date: Wed, 27 Jul 2022 16:21:56 +0000
Subject: [PATCH 001/137] add more settings for randomization

---
 tests/clickhouse-test | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 952fc7fb0a9..c69ff7d0721 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -395,6 +395,16 @@ class FailureReason(enum.Enum):
     INTERNAL_ERROR = "Test internal error: "
 
 
+def default_generator_for_bytes_setting():
+    return (
+        lambda: 0
+        if random.random() < 0.5
+        else 1
+        if random.random() < 0.2
+        else random.randint(1, 10 * 1024 * 1024 * 1024)
+    )
+
+
 class SettingsRandomizer:
     settings = {
         "max_insert_threads": lambda: 0
@@ -427,16 +437,8 @@ class SettingsRandomizer:
         "optimize_aggregation_in_order": lambda: random.randint(0, 1),
         "aggregation_in_order_max_block_bytes": lambda: random.randint(0, 50000000),
         "use_uncompressed_cache": lambda: random.randint(0, 1),
-        "min_bytes_to_use_direct_io": lambda: 0
-        if random.random() < 0.5
-        else 1
-        if random.random() < 0.2
-        else random.randint(1, 1024 * 1024 * 1024),
-        "min_bytes_to_use_mmap_io": lambda: 0
-        if random.random() < 0.5
-        else 1
-        if random.random() < 0.2
-        else random.randint(1, 1024 * 1024 * 1024),
+        "min_bytes_to_use_direct_io": default_generator_for_bytes_setting(),
+        "min_bytes_to_use_mmap_io": default_generator_for_bytes_setting(),
         "local_filesystem_read_method": lambda: random.choice(
             ["read", "pread", "mmap", "pread_threadpool"]
         ),
@@ -448,6 +450,9 @@ class SettingsRandomizer:
         "compile_sort_description": lambda: random.randint(0, 1),
         "merge_tree_coarse_index_granularity": lambda: random.randint(2, 32),
         "optimize_distinct_in_order": lambda: random.randint(0, 1),
+        "max_bytes_before_external_sort": default_generator_for_bytes_setting(),
+        "max_bytes_before_external_group_by": default_generator_for_bytes_setting(),
+        "max_bytes_before_remerge_sort": lambda: random.randint(1, 3000000000),
     }
 
     @staticmethod

From 3f53477be7e468886e591a6c48c7483541174bfa Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 20 Mar 2023 14:44:11 +0000
Subject: [PATCH 002/137] remove unused code

---
 tests/clickhouse-test | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 10600491b4a..e37c3900b9c 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -491,16 +491,6 @@ class FailureReason(enum.Enum):
     INTERNAL_ERROR = "Test internal error: "
 
 
-def default_generator_for_bytes_setting():
-    return (
-        lambda: 0
-        if random.random() < 0.5
-        else 1
-        if random.random() < 0.2
-        else random.randint(1, 10 * 1024 * 1024 * 1024)
-    )
-
-
 def threshold_generator(always_on_prob, always_off_prob, min_val, max_val):
     def gen():
         tmp = random.random()

From d2360302cf52dd7cc556b297b87784d01044b83a Mon Sep 17 00:00:00 2001
From: guoxiaolong <467887319@qq.com>
Date: Tue, 17 Oct 2023 15:33:21 +0800
Subject: [PATCH 003/137] clickhouse-test print nowTime in one test case.
 Because the end time of all test cases is long, adding a specific test case
 time to print can observe the specific execution status and progress

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index cab7d7e79ff..4ea8129f0c1 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1751,7 +1751,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
             test_cace_name = removesuffix(test_case.name, ".gen", ".sql") + ": "
             if not is_concurrent:
                 sys.stdout.flush()
-                sys.stdout.write(f"{test_cace_name:72}")
+                sys.stdout.write(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {test_cace_name:72}")
                 # This flush is needed so you can see the test name of the long
                 # running test before it will finish. But don't do it in parallel
                 # mode, so that the lines don't mix.

From c552a912608ff213c331383da2805fcb245621de Mon Sep 17 00:00:00 2001
From: guoxiaolong <467887319@qq.com>
Date: Tue, 17 Oct 2023 19:27:42 +0800
Subject: [PATCH 004/137] clickhouse-test case No

---
 tests/clickhouse-test | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 4ea8129f0c1..f70f8fb8a10 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1724,6 +1724,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
         proc_name = multiprocessing.current_process().name
         print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n")
 
+    seria_num=1
     while True:
         if is_concurrent:
             case = queue.get(timeout=args.timeout * 1.1)
@@ -1751,7 +1752,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
             test_cace_name = removesuffix(test_case.name, ".gen", ".sql") + ": "
             if not is_concurrent:
                 sys.stdout.flush()
-                sys.stdout.write(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {test_cace_name:72}")
+                sys.stdout.write(f"Case SN: {seria_num} | Current Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Case Name: {test_cace_name:72}")
                 # This flush is needed so you can see the test name of the long
                 # running test before it will finish. But don't do it in parallel
                 # mode, so that the lines don't mix.
@@ -1797,6 +1798,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
         if failures_chain >= args.max_failures_chain:
             stop_tests()
             break
+        seria_num += 1
 
     if failures_total > 0:
         print(

From edcda1ed6d199912ff5fe50f04caa595b8acc61c Mon Sep 17 00:00:00 2001
From: guoxiaolong <467887319@qq.com>
Date: Wed, 18 Oct 2023 10:12:42 +0800
Subject: [PATCH 005/137] fix checkstyle

---
 tests/clickhouse-test | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index f70f8fb8a10..38f79a8c0b2 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1724,7 +1724,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
         proc_name = multiprocessing.current_process().name
         print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n")
 
-    seria_num=1
+    seria_num = 1
     while True:
         if is_concurrent:
             case = queue.get(timeout=args.timeout * 1.1)
@@ -1752,7 +1752,9 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
             test_cace_name = removesuffix(test_case.name, ".gen", ".sql") + ": "
             if not is_concurrent:
                 sys.stdout.flush()
-                sys.stdout.write(f"Case SN: {seria_num} | Current Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Case Name: {test_cace_name:72}")
+                sys.stdout.write(
+                    f"Case SN: {seria_num} | Current Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Case Name: {test_cace_name:72}"
+                )
                 # This flush is needed so you can see the test name of the long
                 # running test before it will finish. But don't do it in parallel
                 # mode, so that the lines don't mix.

From c59d7003924bdf2f2769db0b04558ecfe301b5a2 Mon Sep 17 00:00:00 2001
From: Sergey Suvorov <suvorov@findmykids.org>
Date: Mon, 30 Oct 2023 20:54:51 +0100
Subject: [PATCH 006/137] Add documentation for minSampleSizeConversion and
 minSampleSizeContinous

---
 .../functions/other-functions.md              | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 6b092cf384d..db231166b74 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -2823,3 +2823,90 @@ Result:
 │ SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)                          │
 └─────────────────────────────────────────────────────────────────────────┘
 ```
+
+## minSampleSizeConversion
+
+Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples.
+
+**Syntax**
+
+``` sql
+minSampleSizeConversion(baseline, mde, power, alpha)
+```
+
+Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the sample size required for one group (i.e. the sample size required for the whole experiment is twice the returned value).
+
+**Arguments**
+
+- `baseline` — Baseline conversion. [Float](../data-types/float.md).
+- `mde` — Minimum detectable effect (MDE) as percentage points (e.g. for a baseline conversion 0.25 the MDE 0.03 means an expected change to 0.25 ± 0.03). [Float](../data-types/float.md).
+- `power` — Required statistical power of a test (1 - probability of Type II error). [Float](../data-types/float.md).
+- `alpha` — Required significance level of a test (probability of Type I error). [Float](../data-types/float.md).
+
+**Returned value**
+
+A named [Tuple](../data-types/tuple.md) with 3 elements:
+
+- `"minimum_sample_size"` — Required sample size. [Float64](../data-types/float.md).
+- `"detect_range_lower"` — Lower bound of the range of values not detectable with the returned required sample size (i.e. all values less than or equal to `"detect_range_lower"` are detectable with the provided `alpha` and `power`). Calculated as `baseline - mde`. [Float64](../data-types/float.md).
+- `"detect_range_upper"` — Upper bound of the range of values not detectable with the returned required sample size (i.e. all values greater than or equal to `"detect_range_upper"` are detectable with the provided `alpha` and `power`). Calculated as `baseline + mde`. [Float64](../data-types/float.md).
+
+**Example**
+
+The following query calculates the required sample size for an A/B test with baseline conversion of 25%, MDE of 3%, significance level of 5%, and the desired statistical power of 80%:
+
+``` sql
+SELECT minSampleSizeConversion(0.25, 0.03, 0.80, 0.05) AS sample_size;
+```
+
+Result:
+
+``` text
+┌─sample_size───────────────────┐
+│ (3396.077603219163,0.22,0.28) │
+└───────────────────────────────┘
+```
+
+## minSampleSizeContinous
+
+Calculates minimum required sample size for an A/B test comparing means of a continuous metric in two samples.
+
+**Syntax**
+
+``` sql
+minSampleSizeContinous(baseline, sigma, mde, power, alpha)
+```
+
+Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the required sample size for one group (i.e. the sample size required for the whole experiment is twice the returned value). Also assumes equal variance of the test metric in treatment and control groups.
+
+**Arguments**
+
+- `baseline` — Baseline value of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
+- `sigma` — Baseline standard deviation of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
+- `mde` — Minimum detectable effect (MDE) as percentage of the baseline value (e.g. for a baseline value 112.25 the MDE 0.03 means an expected change to 112.25 ± 112.25*0.03). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
+- `power` — Required statistical power of a test (1 - probability of Type II error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
+- `alpha` — Required significance level of a test (probability of Type I error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
+
+**Returned value**
+
+A named [Tuple](../data-types/tuple.md) with 3 elements:
+
+- `"minimum_sample_size"` — Required sample size. [Float64](../data-types/float.md).
+- `"detect_range_lower"` — Lower bound of the range of values not detectable with the returned required sample size (i.e. all values less than or equal to `"detect_range_lower"` are detectable with the provided `alpha` and `power`). Calculated as `baseline * (1 - mde)`. [Float64](../data-types/float.md).
+- `"detect_range_upper"` — Upper bound of the range of values not detectable with the returned required sample size (i.e. all values greater than or equal to `"detect_range_upper"` are detectable with the provided `alpha` and `power`). Calculated as `baseline * (1 + mde)`. [Float64](../data-types/float.md).
+
+**Example**
+
+The following query calculates the required sample size for an A/B test on a metric with baseline value of 112.25, standard deviation of 21.1, MDE of 3%, significance level of 5%, and the desired statistical power of 80%:
+
+``` sql
+SELECT minSampleSizeContinous(112.25, 21.1, 0.03, 0.80, 0.05) AS sample_size;
+```
+
+Result:
+
+``` text
+┌─sample_size───────────────────────────┐
+│ (616.2931945826209,108.8825,115.6175) │
+└───────────────────────────────────────┘
+```

From 37843b0c285018c2500a50a5d4eff878271de2a8 Mon Sep 17 00:00:00 2001
From: Sergey Suvorov <suvorov@findmykids.org>
Date: Tue, 31 Oct 2023 13:05:41 +0100
Subject: [PATCH 007/137] Add alias for minSampleSizeContinuous after typo fix
 in #56143

---
 docs/en/sql-reference/functions/other-functions.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index db231166b74..202100c3832 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -2867,7 +2867,7 @@ Result:
 └───────────────────────────────┘
 ```
 
-## minSampleSizeContinous
+## minSampleSizeContinuous
 
 Calculates minimum required sample size for an A/B test comparing means of a continuous metric in two samples.
 
@@ -2877,6 +2877,8 @@ Calculates minimum required sample size for an A/B test comparing means of a con
 minSampleSizeContinous(baseline, sigma, mde, power, alpha)
 ```
 
+Alias: `minSampleSizeContinous`
+
 Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the required sample size for one group (i.e. the sample size required for the whole experiment is twice the returned value). Also assumes equal variance of the test metric in treatment and control groups.
 
 **Arguments**

From 0a5888b6f852435f438f4fc9f389dabe375a8b28 Mon Sep 17 00:00:00 2001
From: Sergey Suvorov <suvorov@findmykids.org>
Date: Tue, 31 Oct 2023 17:04:37 +0100
Subject: [PATCH 008/137] Add new function names to aspell-dict.txt

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 6c4edc37e24..39b637238c0 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1821,6 +1821,8 @@ mininum
 miniselect
 minmap
 minmax
+minSampleSizeContinuous
+minSampleSizeConversion
 mins
 misconfiguration
 mispredictions

From 89fa729235106511423c8fde75a328f907f8cf1f Mon Sep 17 00:00:00 2001
From: Chen Lixiang <chenlixiang.dev@gmail.com>
Date: Fri, 24 Nov 2023 12:45:59 +0800
Subject: [PATCH 009/137] show uncompressed size in system.tables

---
 src/Storages/IStorage.h                              |  7 +++++++
 src/Storages/MergeTree/IMergeTreeDataPart.cpp        |  2 ++
 src/Storages/MergeTree/IMergeTreeDataPart.h          |  3 +++
 src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp |  9 +++++++++
 src/Storages/MergeTree/MergeTreeDataPartChecksum.h   |  1 +
 src/Storages/MergeTree/MergedBlockOutputStream.cpp   |  1 +
 src/Storages/StorageMaterializedView.cpp             | 10 ++++++++++
 src/Storages/StorageMaterializedView.h               |  1 +
 src/Storages/StorageMergeTree.cpp                    |  9 +++++++++
 src/Storages/StorageMergeTree.h                      |  1 +
 src/Storages/StorageReplicatedMergeTree.cpp          |  6 ++++++
 src/Storages/StorageReplicatedMergeTree.h            |  2 ++
 src/Storages/System/StorageSystemTables.cpp          | 10 ++++++++++
 13 files changed, 62 insertions(+)

diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index e70e9a61062..26ad859983d 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -689,6 +689,13 @@ public:
     /// when considering in-memory blocks.
     virtual std::optional<UInt64> totalBytes(const Settings &) const { return {}; }
 
+    /// If it is possible to quickly determine exact number of uncompressed bytes for the table on storage:
+    /// - disk (uncompressed)
+    ///
+    /// Used for:
+    /// - For total_bytes_uncompressed column in system.tables
+    virtual std::optional<UInt64> totalBytesUncompressed(const Settings &) const { return {}; } 
+
     /// Number of rows INSERTed since server start.
     ///
     /// Does not take the underlying Storage (if any) into account.
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 9bc72577b25..2b71faa4cd9 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1116,6 +1116,7 @@ void IMergeTreeDataPart::loadChecksums(bool require)
         {
             assertEOF(*buf);
             bytes_on_disk = checksums.getTotalSizeOnDisk();
+            bytes_uncompressed_on_disk = checksums.getTotalSizeUncompressedOnDisk();
         }
         else
             bytes_on_disk = getDataPartStorage().calculateTotalSizeOnDisk();
@@ -1133,6 +1134,7 @@ void IMergeTreeDataPart::loadChecksums(bool require)
         writeChecksums(checksums, {});
 
         bytes_on_disk = checksums.getTotalSizeOnDisk();
+        bytes_uncompressed_on_disk = checksums.getTotalSizeUncompressedOnDisk();
     }
 }
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index a9659d2f5f4..03d81a570d6 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -364,7 +364,9 @@ public:
     UInt64 getIndexSizeFromFile() const;
 
     UInt64 getBytesOnDisk() const { return bytes_on_disk; }
+    UInt64 getBytesUncompressedOnDisk() const { return bytes_uncompressed_on_disk; }
     void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; }
+    void setBytesUncompressedOnDisk(UInt64 bytes_uncompressed_on_disk_) { bytes_uncompressed_on_disk = bytes_uncompressed_on_disk_; }
 
     size_t getFileSizeOrZero(const String & file_name) const;
 
@@ -554,6 +556,7 @@ protected:
     /// Total size on disk, not only columns. May not contain size of
     /// checksums.txt and columns.txt. 0 - if not counted;
     UInt64 bytes_on_disk{0};
+    UInt64 bytes_uncompressed_on_disk{0};
 
     /// Columns description. Cannot be changed, after part initialization.
     NamesAndTypesList columns;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
index ed2202fcb19..c5d3865a72d 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
@@ -1,5 +1,6 @@
 #include "MergeTreeDataPartChecksum.h"
 #include <Common/SipHash.h>
+#include "base/types.h"
 #include <base/hex.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
@@ -100,6 +101,14 @@ UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const
     return res;
 }
 
+UInt64 MergeTreeDataPartChecksums::getTotalSizeUncompressedOnDisk() const
+{
+    UInt64 res = 0;
+    for (const auto & [_, checksum] : files)
+        res += checksum.uncompressed_size;
+    return res;
+}
+
 bool MergeTreeDataPartChecksums::read(ReadBuffer & in, size_t format_version)
 {
     switch (format_version)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h
index 8e5e8c8c448..837b940e354 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h
@@ -88,6 +88,7 @@ struct MergeTreeDataPartChecksums
     static MergeTreeDataPartChecksums deserializeFrom(const String & s);
 
     UInt64 getTotalSizeOnDisk() const;
+    UInt64 getTotalSizeUncompressedOnDisk() const;
 };
 
 /// A kind of MergeTreeDataPartChecksums intended to be stored in ZooKeeper (to save its RAM)
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index adea78429c4..cf3ef716eb1 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -182,6 +182,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(
     new_part->index = writer->releaseIndexColumns();
     new_part->checksums = checksums;
     new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk());
+    new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk());
     new_part->index_granularity = writer->getIndexGranularity();
     new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk();
 
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 97cfd550769..c5941322f20 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -477,6 +477,16 @@ std::optional<UInt64> StorageMaterializedView::totalBytes(const Settings & setti
     return {};
 }
 
+std::optional<UInt64> StorageMaterializedView::totalBytesUncompressed(const Settings & settings) const
+{
+    if (hasInnerTable())
+    {
+        if (auto table = tryGetTargetTable())
+            return table->totalBytesUncompressed(settings);
+    }
+    return {};
+}
+
 ActionLock StorageMaterializedView::getActionLock(StorageActionBlockType type)
 {
     if (has_inner_table)
diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index ae38cfb7e59..55843197d50 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -103,6 +103,7 @@ public:
 
     std::optional<UInt64> totalRows(const Settings & settings) const override;
     std::optional<UInt64> totalBytes(const Settings & settings) const override;
+    std::optional<UInt64> totalBytesUncompressed(const Settings & settings) const override;
 
 private:
     /// Will be initialized in constructor
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index e9a0dd5fbf3..d9aa7d261cc 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -271,6 +271,15 @@ std::optional<UInt64> StorageMergeTree::totalBytes(const Settings &) const
     return getTotalActiveSizeInBytes();
 }
 
+std::optional<UInt64> StorageMergeTree::totalBytesUncompressed(const Settings &) const
+{
+    UInt64 res = 0;
+    auto parts = getDataPartsForInternalUsage();
+    for (const auto & part : parts)
+        res += part->getBytesUncompressedOnDisk();
+    return res;
+}
+
 SinkToStoragePtr
 StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
 {
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 539037a90ae..a0d0a2519a0 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -70,6 +70,7 @@ public:
     std::optional<UInt64> totalRows(const Settings &) const override;
     std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo &, ContextPtr) const override;
     std::optional<UInt64> totalBytes(const Settings &) const override;
+    std::optional<UInt64> totalBytesUncompressed(const Settings &) const override;
 
     SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override;
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index f841ca21938..576e6fde1c8 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5459,6 +5459,12 @@ std::optional<UInt64> StorageReplicatedMergeTree::totalBytes(const Settings & se
     return res;
 }
 
+std::optional<UInt64> StorageReplicatedMergeTree::totalBytesUncompressed(const Settings & settings) const
+{
+    UInt64 res = 0;
+    foreachActiveParts([&res](auto & part) { res += part->getBytesUncompressedOnDisk(); }, settings.select_sequential_consistency);
+    return res;
+}
 
 void StorageReplicatedMergeTree::assertNotReadonly() const
 {
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index b2a67572adc..a1d222db57a 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -2,6 +2,7 @@
 
 #include <base/UUID.h>
 #include <atomic>
+#include <optional>
 #include <pcg_random.hpp>
 #include <Storages/IStorage.h>
 #include <Storages/MergeTree/AsyncBlockIDsCache.h>
@@ -165,6 +166,7 @@ public:
     std::optional<UInt64> totalRows(const Settings & settings) const override;
     std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context) const override;
     std::optional<UInt64> totalBytes(const Settings & settings) const override;
+    std::optional<UInt64> totalBytesUncompressed(const Settings & settings) const override;
 
     SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override;
 
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index d888813f6ce..267fbfc55e9 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -51,6 +51,7 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_)
         {"storage_policy", std::make_shared<DataTypeString>()},
         {"total_rows", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>())},
         {"total_bytes", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>())},
+        {"total_bytes_uncompressed", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>())},
         {"parts", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>())},
         {"active_parts", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>())},
         {"total_marks", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>())},
@@ -488,6 +489,15 @@ protected:
                         res_columns[res_index++]->insertDefault();
                 }
 
+                if (columns_mask[src_index++])
+                {
+                    auto total_bytes_uncompressed = table->totalBytesUncompressed(settings);
+                    if (total_bytes_uncompressed)
+                        res_columns[res_index++]->insert(*total_bytes_uncompressed);
+                    else
+                        res_columns[res_index++]->insertDefault();
+                }
+
                 auto table_merge_tree = std::dynamic_pointer_cast<MergeTreeData>(table);
                 if (columns_mask[src_index++])
                 {

From 2b35e98ef856258fbc0700926e9918882b8825b4 Mon Sep 17 00:00:00 2001
From: Chen Lixiang <chenlixiang.dev@gmail.com>
Date: Fri, 24 Nov 2023 16:20:11 +0800
Subject: [PATCH 010/137] add comments and doc

---
 docs/en/operations/system-tables/tables.md           | 2 ++
 src/Storages/IStorage.h                              | 2 ++
 src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp | 1 -
 src/Storages/StorageReplicatedMergeTree.h            | 1 -
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md
index e4461e14236..231be6404a3 100644
--- a/docs/en/operations/system-tables/tables.md
+++ b/docs/en/operations/system-tables/tables.md
@@ -57,6 +57,8 @@ Columns:
     - If the table stores data on disk, returns used space on disk (i.e. compressed).
     - If the table stores data in memory, returns approximated number of used bytes in memory.
 
+- `total_bytes_uncompressed` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of uncompressed bytes, if it is possible to quickly determine exact number of bytes from checksums for the table on storage, otherwise `NULL` (does not includes any underlying storage).
+
 - `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables).
 
 - `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 26ad859983d..6bae60bbfd6 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -694,6 +694,8 @@ public:
     ///
     /// Used for:
     /// - For total_bytes_uncompressed column in system.tables
+    ///
+    /// Does not takes underlying Storage (if any) into account
     virtual std::optional<UInt64> totalBytesUncompressed(const Settings &) const { return {}; } 
 
     /// Number of rows INSERTed since server start.
diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
index c5d3865a72d..ef7d38df4b2 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
@@ -1,6 +1,5 @@
 #include "MergeTreeDataPartChecksum.h"
 #include <Common/SipHash.h>
-#include "base/types.h"
 #include <base/hex.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index a1d222db57a..df68572e1e4 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -2,7 +2,6 @@
 
 #include <base/UUID.h>
 #include <atomic>
-#include <optional>
 #include <pcg_random.hpp>
 #include <Storages/IStorage.h>
 #include <Storages/MergeTree/AsyncBlockIDsCache.h>

From 51e3899f73c0581a4fe19c09dd69b8f93c86d36a Mon Sep 17 00:00:00 2001
From: Tim Windelschmidt <tim@monogon.tech>
Date: Wed, 13 Sep 2023 18:47:33 +0200
Subject: [PATCH 011/137] Add setting http_make_head_request

Clickhouse always does a HEAD request before actually executing the GET request,
this adds a settings to skip that request.
---
 docs/en/operations/settings/settings.md | 11 +++++++++++
 src/Core/Settings.h                     |  1 +
 src/IO/ReadSettings.h                   |  1 +
 src/IO/ReadWriteBufferFromHTTP.cpp      |  5 +++++
 src/Interpreters/Context.cpp            |  1 +
 5 files changed, 19 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index edc1c9bdfd7..39542acdaae 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -460,6 +460,17 @@ Possible values:
 
 Default value: 1048576.
 
+## http_make_head_request {#http-make-head-request}
+
+Enables or disables execution of a HEAD request before the actual GET request
+
+Possible values:
+
+- 0 — Disabled.
+- 1 — Enabled.
+
+Default value: 1.
+
 ## table_function_remote_max_addresses {#table_function_remote_max_addresses}
 
 Sets the maximum number of addresses generated from patterns for the [remote](../../sql-reference/table-functions/remote.md) function.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 448656aa435..c29a4ef3750 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -333,6 +333,7 @@ class IColumn;
     M(UInt64, http_max_field_value_size, 128 * 1024, "Maximum length of field value in HTTP header", 0) \
     M(UInt64, http_max_chunk_size, 100_GiB, "Maximum value of a chunk size in HTTP chunked transfer encoding", 0) \
     M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \
+    M(Bool, http_make_head_request, true, "If it is set to true, execute a HEAD request before the actual GET request", 0) \
     M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \
     M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \
     M(UInt64, use_index_for_in_with_subqueries_max_values, 0, "The maximum size of set in the right hand side of the IN operator to use table index for filtering. It allows to avoid performance degradation and higher memory usage due to preparation of additional data structures for large queries. Zero means no limit.", 0) \
diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h
index 4c8a6cb020a..a8a31d82e56 100644
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@@ -120,6 +120,7 @@ struct ReadSettings
     size_t http_retry_initial_backoff_ms = 100;
     size_t http_retry_max_backoff_ms = 1600;
     bool http_skip_not_found_url_for_globs = true;
+    bool http_make_head_request = true;
 
     /// Monitoring
     bool for_object_storage = false; // to choose which profile events should be incremented
diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp
index 6dd6269e16f..d54e60e9950 100644
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@@ -808,6 +808,11 @@ std::optional<time_t> ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::tryGetLa
 template <typename UpdatableSessionPtr>
 HTTPFileInfo ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getFileInfo()
 {
+    if (!settings.http_make_head_request)
+    {
+            return HTTPFileInfo{};
+    }
+
     Poco::Net::HTTPResponse response;
     try
     {
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 185f9782da5..ba45e7c064e 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4843,6 +4843,7 @@ ReadSettings Context::getReadSettings() const
     res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms;
     res.http_retry_max_backoff_ms = settings.http_retry_max_backoff_ms;
     res.http_skip_not_found_url_for_globs = settings.http_skip_not_found_url_for_globs;
+    res.http_make_head_request = settings.http_make_head_request;
 
     res.mmap_cache = getMMappedFileCache().get();
 

From 1044e3366b392fd233634ddd85588c8a538e05ad Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 24 Nov 2023 10:29:01 +0100
Subject: [PATCH 012/137] Update http_make_head_request doc

---
 docs/en/operations/settings/settings.md | 9 ++-------
 src/Core/Settings.h                     | 2 +-
 src/IO/ReadWriteBufferFromHTTP.cpp      | 6 +++---
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 39542acdaae..4bbac98a579 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -462,14 +462,9 @@ Default value: 1048576.
 
 ## http_make_head_request {#http-make-head-request}
 
-Enables or disables execution of a HEAD request before the actual GET request
+The `http_make_head_request` setting allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size. Since it's enabled by default, it may be desirable to disable this setting in cases where the server does not support `HEAD` requests.
 
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-Default value: 1.
+Default value: `true`.
 
 ## table_function_remote_max_addresses {#table_function_remote_max_addresses}
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c29a4ef3750..03d5853261c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -333,7 +333,7 @@ class IColumn;
     M(UInt64, http_max_field_value_size, 128 * 1024, "Maximum length of field value in HTTP header", 0) \
     M(UInt64, http_max_chunk_size, 100_GiB, "Maximum value of a chunk size in HTTP chunked transfer encoding", 0) \
     M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \
-    M(Bool, http_make_head_request, true, "If it is set to true, execute a HEAD request before the actual GET request", 0) \
+    M(Bool, http_make_head_request, true, "Allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size", 0) \
     M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \
     M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \
     M(UInt64, use_index_for_in_with_subqueries_max_values, 0, "The maximum size of set in the right hand side of the IN operator to use table index for filtering. It allows to avoid performance degradation and higher memory usage due to preparation of additional data structures for large queries. Zero means no limit.", 0) \
diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp
index d54e60e9950..96f05889882 100644
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@@ -808,10 +808,10 @@ std::optional<time_t> ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::tryGetLa
 template <typename UpdatableSessionPtr>
 HTTPFileInfo ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getFileInfo()
 {
+    /// May be disabled in case the user knows in advance that the server doesn't support HEAD requests.
+    /// Allows to avoid making unnecessary requests in such cases.
     if (!settings.http_make_head_request)
-    {
-            return HTTPFileInfo{};
-    }
+        return HTTPFileInfo{};
 
     Poco::Net::HTTPResponse response;
     try

From 3f8cfa0060cedf03dfa94dee5e0c5465d5df5a29 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 24 Nov 2023 09:35:48 +0000
Subject: [PATCH 013/137] Add http_make_head_request function to
 SettingsRandomizer

---
 tests/clickhouse-test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 4b551ed3663..60416521ed1 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -631,6 +631,7 @@ class SettingsRandomizer:
                 get_localzone(),
             ]
         ),
+        "http_make_head_request": lambda: random.randint(0, 1),
     }
 
     @staticmethod

From 67f0a0e3e44649c8ce46ec31e1fda329b9c132e6 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 28 Nov 2023 23:57:55 +0100
Subject: [PATCH 014/137] don't throw if noop when dropping db replica in batch

---
 src/Databases/DatabaseReplicated.cpp                     | 6 +++++-
 src/Databases/DatabaseReplicated.h                       | 2 +-
 src/Interpreters/InterpreterSystemQuery.cpp              | 6 +++---
 tests/queries/0_stateless/02447_drop_database_replica.sh | 8 ++++++++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 5da20c42465..0ebc5aa6023 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1175,7 +1175,7 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node
 }
 
 void DatabaseReplicated::dropReplica(
-    DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica)
+    DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop)
 {
     assert(!database || database_zookeeper_path == database->zookeeper_path);
 
@@ -1192,8 +1192,12 @@ void DatabaseReplicated::dropReplica(
 
     String database_replica_path = fs::path(database_zookeeper_path) / "replicas" / full_replica_name;
     if (!zookeeper->exists(database_replica_path))
+    {
+        if (!throw_if_noop)
+            return;
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} does not exist (database path: {})",
                         full_replica_name, database_zookeeper_path);
+    }
 
     if (zookeeper->exists(database_replica_path + "/active"))
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} is active, cannot drop it (database path: {})",
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 1387ba1cb96..5a90cf9c8b7 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -81,7 +81,7 @@ public:
 
     bool shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const override;
 
-    static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica);
+    static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop);
 
     std::vector<UInt8> tryGetAreReplicasActive(const ClusterPtr & cluster_) const;
 
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 9c3db6cfdbd..6623ece8d6d 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -954,7 +954,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
         if (auto * replicated = dynamic_cast<DatabaseReplicated *>(database.get()))
         {
             check_not_local_replica(replicated, query);
-            DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica);
+            DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica, /*throw_if_noop*/ true);
         }
         else
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database {} is not Replicated, cannot drop replica", query.getDatabase());
@@ -979,7 +979,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
             }
 
             check_not_local_replica(replicated, query);
-            DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica);
+            DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica, /*throw_if_noop*/ false);
             LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName()));
         }
     }
@@ -992,7 +992,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
             if (auto * replicated = dynamic_cast<DatabaseReplicated *>(elem.second.get()))
                 check_not_local_replica(replicated, query);
 
-        DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.shard, query.replica);
+        DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.shard, query.replica, /*throw_if_noop*/ true);
         LOG_INFO(log, "Dropped replica {} of Replicated database with path {}", query.replica, query.replica_zk_path);
     }
     else
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh
index 47a6cf10bda..d5b3ceef46a 100755
--- a/tests/queries/0_stateless/02447_drop_database_replica.sh
+++ b/tests/queries/0_stateless/02447_drop_database_replica.sh
@@ -55,7 +55,15 @@ $CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create databas
 $CLICKHOUSE_CLIENT -q "system sync database replica $db4"
 $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db4'"
 
+# Don't throw "replica doesn't exist" when removing all replicas [from a database]
+$CLICKHOUSE_CLIENT -q "system drop database replica 'doesntexist$CLICKHOUSE_DATABASE' from shard 'doesntexist'"
+
 $CLICKHOUSE_CLIENT -q "drop database $db"
 $CLICKHOUSE_CLIENT -q "drop database $db2"
 $CLICKHOUSE_CLIENT -q "drop database $db3"
+
+$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db4.rmt (n int) engine=ReplicatedMergeTree order by n"
+$CLICKHOUSE_CLIENT -q "system drop replica 'doesntexist$CLICKHOUSE_DATABASE' from database $db4"
+$CLICKHOUSE_CLIENT -q "system drop replica 'doesntexist$CLICKHOUSE_DATABASE'"
+
 $CLICKHOUSE_CLIENT -q "drop database $db4"

From 88920f08f131ccca14aa8e1e394264e73725181b Mon Sep 17 00:00:00 2001
From: Chen Lixiang <chenlixiang.dev@gmail.com>
Date: Wed, 29 Nov 2023 17:09:56 +0800
Subject: [PATCH 015/137] add tests and fix whitespace issue

---
 src/Storages/IStorage.h                       |  4 ++--
 ...m_columns_and_system_tables_long.reference |  5 +++-
 ..._system_columns_and_system_tables_long.sql | 24 ++++++++++++++++++-
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 6bae60bbfd6..db979f38535 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -696,8 +696,8 @@ public:
     /// - For total_bytes_uncompressed column in system.tables
     ///
     /// Does not takes underlying Storage (if any) into account
-    virtual std::optional<UInt64> totalBytesUncompressed(const Settings &) const { return {}; } 
-
+    virtual std::optional<UInt64> totalBytesUncompressed(const Settings &) const { return {}; }
+    
     /// Number of rows INSERTed since server start.
     ///
     /// Does not take the underlying Storage (if any) into account.
diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
index 58f8b7abfb3..9a7bb683cde 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
@@ -6,7 +6,7 @@
 │ name2 │                   1 │                 0 │                 0 │                  0 │
 │ name3 │                   0 │                 0 │                 0 │                  0 │
 └───────┴─────────────────────┴───────────────────┴───────────────────┴────────────────────┘
-231	1
+3	231	1
 ┌─name────────────────┬─partition_key─┬─sorting_key───┬─primary_key─┬─sampling_key─┐
 │ check_system_tables │ date          │ date, version │ date        │              │
 └─────────────────────┴───────────────┴───────────────┴─────────────┴──────────────┘
@@ -51,3 +51,6 @@ Check total_bytes/total_rows for Set
 Check total_bytes/total_rows for Join
 10240	50
 10240	100
+Check total_uncompressed_bytes/total_bytes/total_rows for Materialized views
+0	0	0
+117	397	1
diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
index 16085c8a995..0ffcc77a405 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
@@ -23,7 +23,7 @@ FROM system.columns WHERE table = 'check_system_tables' AND database = currentDa
 FORMAT PrettyCompactNoEscapes;
 
 INSERT INTO check_system_tables VALUES (1, 1, 1);
-SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();
+SELECT total_bytes_uncompressed, total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();
 
 DROP TABLE IF EXISTS check_system_tables;
 
@@ -68,6 +68,8 @@ FORMAT PrettyCompactNoEscapes;
 
 DROP TABLE IF EXISTS check_system_tables;
 
+
+
 SELECT 'Check total_bytes/total_rows for TinyLog';
 CREATE TABLE check_system_tables (key UInt8) ENGINE = TinyLog();
 SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();
@@ -138,3 +140,23 @@ SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tab
 INSERT INTO check_system_tables SELECT number+50 FROM numbers(50);
 SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();
 DROP TABLE check_system_tables;
+
+-- Build MergeTree table for Materialized view
+CREATE TABLE check_system_tables
+  (
+    name1 UInt8,
+    name2 UInt8,
+    name3 UInt8
+  ) ENGINE = MergeTree()
+    ORDER BY name1
+    PARTITION BY name2
+    SAMPLE BY name1
+    SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1;
+
+SELECT 'Check total_uncompressed_bytes/total_bytes/total_rows for Materialized views';
+CREATE MATERIALIZED VIEW check_system_tables_mv ENGINE = MergeTree() ORDER BY name2 AS SELECT name1, name2, name3 FROM check_system_tables;
+SELECT total_bytes_uncompressed, total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables_mv' AND database = currentDatabase();
+INSERT INTO check_system_tables VALUES (1, 1, 1);
+SELECT total_bytes_uncompressed, total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables_mv' AND database = currentDatabase();
+DROP TABLE check_system_tables_mv;
+DROP TABLE check_system_tables;

From 9f849fbe06fec0ba6b9ac9c37bb259ca214d3457 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 29 Nov 2023 17:12:04 +0100
Subject: [PATCH 016/137] fix

---
 src/Databases/DatabaseReplicated.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 0ebc5aa6023..7104c53de43 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1186,7 +1186,10 @@ void DatabaseReplicated::dropReplica(
 
     auto zookeeper = Context::getGlobalContextInstance()->getZooKeeper();
 
-    String database_mark = zookeeper->get(database_zookeeper_path);
+    String database_mark;
+    bool db_path_exists = zookeeper->tryGet(database_zookeeper_path, database_mark);
+    if (!db_path_exists && !throw_if_noop)
+        return;
     if (database_mark != REPLICATED_DATABASE_MARK)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} does not look like a path of Replicated database", database_zookeeper_path);
 

From 7fbe7462b68bc7c7a6c8145d0f8612424b17550a Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 1 Dec 2023 19:12:05 +0000
Subject: [PATCH 017/137] add mutation command to apply deleted mask

---
 src/Interpreters/InterpreterAlterQuery.cpp    |   2 +
 src/Interpreters/MutationsInterpreter.cpp     | 147 ++++++++++++------
 src/Interpreters/MutationsInterpreter.h       |   3 +
 src/Parsers/ASTAlterQuery.cpp                 |  16 ++
 src/Parsers/ASTAlterQuery.h                   |   1 +
 src/Parsers/ParserAlterQuery.cpp              |  17 ++
 src/Storages/MergeTree/MutateTask.cpp         |  47 +++---
 src/Storages/MutationCommands.cpp             |   9 ++
 src/Storages/MutationCommands.h               |   1 +
 src/Storages/StorageSnapshot.cpp              |   1 +
 .../02932_apply_deleted_mask.reference        |   6 +
 .../0_stateless/02932_apply_deleted_mask.sql  |  22 +++
 12 files changed, 207 insertions(+), 65 deletions(-)
 create mode 100644 tests/queries/0_stateless/02932_apply_deleted_mask.reference
 create mode 100644 tests/queries/0_stateless/02932_apply_deleted_mask.sql

diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp
index 96bb7615416..0afdb3bab57 100644
--- a/src/Interpreters/InterpreterAlterQuery.cpp
+++ b/src/Interpreters/InterpreterAlterQuery.cpp
@@ -155,6 +155,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
         }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query");
+
         if (!getContext()->getSettings().allow_experimental_statistic && (
             command_ast->type == ASTAlterCommand::ADD_STATISTIC ||
             command_ast->type == ASTAlterCommand::DROP_STATISTIC ||
@@ -407,6 +408,7 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS
             break;
         }
         case ASTAlterCommand::DELETE:
+        case ASTAlterCommand::APPLY_DELETED_MASK:
         case ASTAlterCommand::DROP_PARTITION:
         case ASTAlterCommand::DROP_DETACHED_PARTITION:
         {
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 8e56b08f1ed..4cef15f6220 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -26,6 +26,7 @@
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/formatAST.h>
+#include <Parsers/queryToString.h>
 #include <IO/WriteHelpers.h>
 #include <Processors/QueryPlan/CreatingSetsStep.h>
 #include <DataTypes/NestedUtils.h>
@@ -153,19 +154,29 @@ bool isStorageTouchedByMutations(
         return false;
 
     bool all_commands_can_be_skipped = true;
-    for (const MutationCommand & command : commands)
+    for (const auto & command : commands)
     {
-        if (!command.predicate) /// The command touches all rows.
-            return true;
-
-        if (command.partition)
+        if (command.type == MutationCommand::APPLY_DELETED_MASK)
         {
-            const String partition_id = storage.getPartitionIDFromQuery(command.partition, context);
-            if (partition_id == source_part->info.partition_id)
-                all_commands_can_be_skipped = false;
+            if (source_part->hasLightweightDelete())
+                return true;
         }
         else
-            all_commands_can_be_skipped = false;
+        {
+            if (!command.predicate) /// The command touches all rows.
+                return true;
+
+            if (command.partition)
+            {
+                const String partition_id = storage.getPartitionIDFromQuery(command.partition, context);
+                if (partition_id == source_part->info.partition_id)
+                    all_commands_can_be_skipped = false;
+            }
+            else
+            {
+                all_commands_can_be_skipped = false;
+            }
+        }
     }
 
     if (all_commands_can_be_skipped)
@@ -211,7 +222,6 @@ bool isStorageTouchedByMutations(
     return count != 0;
 }
 
-
 ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
     const MutationCommand & command,
     const StoragePtr & storage,
@@ -244,6 +254,32 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
         return command.predicate ? command.predicate->clone() : partition_predicate_as_ast_func;
 }
 
+
+MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command)
+{
+    if (command.type != MutationCommand::APPLY_DELETED_MASK)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected APPLY_DELETED_MASK mutation command, got: {}", magic_enum::enum_name(command.type));
+
+    auto alter_command = std::make_shared<ASTAlterCommand>();
+    alter_command->type = ASTAlterCommand::DELETE;
+    alter_command->partition = command.partition;
+
+    auto row_exists_predicate = makeASTFunction("equals",
+        std::make_shared<ASTIdentifier>(LightweightDeleteDescription::FILTER_COLUMN.name),
+        std::make_shared<ASTLiteral>(Field(0)));
+
+    if (command.predicate)
+        alter_command->predicate = makeASTFunction("and", row_exists_predicate, command.predicate);
+    else
+        alter_command->predicate = row_exists_predicate;
+
+    auto mutation_command = MutationCommand::parse(alter_command.get());
+    if (!mutation_command)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to parse command {}. It's a bug", queryToString(alter_command));
+
+    return *mutation_command;
+}
+
 MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(storage_))
 {
 }
@@ -517,15 +553,18 @@ void MutationsInterpreter::prepare(bool dry_run)
     NameSet updated_columns;
     bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly();
 
-    for (const MutationCommand & command : commands)
+    for (auto & command : commands)
     {
-        if (command.type == MutationCommand::Type::UPDATE
-            || command.type == MutationCommand::Type::DELETE)
+        if (command.type == MutationCommand::Type::APPLY_DELETED_MASK)
+            command = createCommandToApplyDeletedMask(command);
+
+        if (command.type == MutationCommand::Type::UPDATE || command.type == MutationCommand::Type::DELETE)
             materialize_ttl_recalculate_only = false;
 
         for (const auto & [name, _] : command.column_to_update_expression)
         {
-            if (!available_columns_set.contains(name) && name != LightweightDeleteDescription::FILTER_COLUMN.name
+            if (!available_columns_set.contains(name)
+                && name != LightweightDeleteDescription::FILTER_COLUMN.name
                 && name != BlockNumberColumn::name)
                 throw Exception(ErrorCodes::THERE_IS_NO_COLUMN,
                     "Column {} is updated but not requested to read", name);
@@ -574,7 +613,7 @@ void MutationsInterpreter::prepare(bool dry_run)
     std::vector<String> read_columns;
 
     /// First, break a sequence of commands into stages.
-    for (auto & command : commands)
+    for (const auto & command : commands)
     {
         // we can return deleted rows only if it's the only present command
         assert(command.type == MutationCommand::DELETE || command.type == MutationCommand::UPDATE || !settings.return_mutated_rows);
@@ -585,7 +624,7 @@ void MutationsInterpreter::prepare(bool dry_run)
             if (stages.empty() || !stages.back().column_to_updated.empty())
                 stages.emplace_back(context);
 
-            auto predicate  = getPartitionAndPredicateExpressionForMutationCommand(command);
+            auto predicate = getPartitionAndPredicateExpressionForMutationCommand(command);
 
             if (!settings.return_mutated_rows)
                 predicate = makeASTFunction("isZeroOrNull", predicate);
@@ -605,16 +644,12 @@ void MutationsInterpreter::prepare(bool dry_run)
 
             NameSet affected_materialized;
 
-            for (const auto & kv : command.column_to_update_expression)
+            for (const auto & [column_name, update_expr] : command.column_to_update_expression)
             {
-                const String & column = kv.first;
-
-                auto materialized_it = column_to_affected_materialized.find(column);
+                auto materialized_it = column_to_affected_materialized.find(column_name);
                 if (materialized_it != column_to_affected_materialized.end())
-                {
-                    for (const String & mat_column : materialized_it->second)
+                    for (const auto & mat_column : materialized_it->second)
                         affected_materialized.emplace(mat_column);
-                }
 
                 /// When doing UPDATE column = expression WHERE condition
                 /// we will replace column to the result of the following expression:
@@ -627,33 +662,39 @@ void MutationsInterpreter::prepare(bool dry_run)
                 /// Outer CAST is added just in case if we don't trust the returning type of 'if'.
 
                 DataTypePtr type;
-                if (auto physical_column = columns_desc.tryGetPhysical(column))
+                if (auto physical_column = columns_desc.tryGetPhysical(column_name))
+                {
                     type = physical_column->type;
-                else if (column == LightweightDeleteDescription::FILTER_COLUMN.name)
+                }
+                else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
+                {
                     type = LightweightDeleteDescription::FILTER_COLUMN.type;
-                else if (column == BlockNumberColumn::name)
+                    deleted_mask_updated = true;
+                }
+                else if (column_name == BlockNumberColumn::name)
+                {
                     type = BlockNumberColumn::type;
+                }
                 else
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column);
+                {
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column_name);
+                }
 
                 auto type_literal = std::make_shared<ASTLiteral>(type->getName());
-
-                const auto & update_expr = kv.second;
-
                 ASTPtr condition = getPartitionAndPredicateExpressionForMutationCommand(command);
 
                 /// And new check validateNestedArraySizes for Nested subcolumns
-                if (isArray(type) && !Nested::splitName(column).second.empty())
+                if (isArray(type) && !Nested::splitName(column_name).second.empty())
                 {
                     std::shared_ptr<ASTFunction> function = nullptr;
 
-                    auto nested_update_exprs = getExpressionsOfUpdatedNestedSubcolumns(column, all_columns, command.column_to_update_expression);
+                    auto nested_update_exprs = getExpressionsOfUpdatedNestedSubcolumns(column_name, all_columns, command.column_to_update_expression);
                     if (!nested_update_exprs)
                     {
                         function = makeASTFunction("validateNestedArraySizes",
                             condition,
                             update_expr->clone(),
-                            std::make_shared<ASTIdentifier>(column));
+                            std::make_shared<ASTIdentifier>(column_name));
                         condition = makeASTFunction("and", condition, function);
                     }
                     else if (nested_update_exprs->size() > 1)
@@ -675,10 +716,10 @@ void MutationsInterpreter::prepare(bool dry_run)
                         makeASTFunction("_CAST",
                             update_expr->clone(),
                             type_literal),
-                        std::make_shared<ASTIdentifier>(column)),
+                        std::make_shared<ASTIdentifier>(column_name)),
                     type_literal);
 
-                stages.back().column_to_updated.emplace(column, updated_column);
+                stages.back().column_to_updated.emplace(column_name, updated_column);
 
                 if (condition && settings.return_mutated_rows)
                     stages.back().filters.push_back(condition);
@@ -987,26 +1028,41 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
 
     /// Add _row_exists column if it is present in the part
     if (source.hasLightweightDeleteMask())
-        all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
+        all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN);
 
+
+    bool has_filters = false;
     /// Next, for each stage calculate columns changed by this and previous stages.
     for (size_t i = 0; i < prepared_stages.size(); ++i)
     {
         if (settings.return_all_columns || !prepared_stages[i].filters.empty())
         {
             for (const auto & column : all_columns)
+            {
+                if (column.name == LightweightDeleteDescription::FILTER_COLUMN.name && !deleted_mask_updated)
+                    continue;
+
                 prepared_stages[i].output_columns.insert(column.name);
-            continue;
+            }
+
+            has_filters = true;
         }
+        else
+        {
+            if (i > 0)
+                prepared_stages[i].output_columns = prepared_stages[i - 1].output_columns;
 
-        if (i > 0)
-            prepared_stages[i].output_columns = prepared_stages[i - 1].output_columns;
+            /// Make sure that all updated columns are included into output_columns set.
+            /// This is important for a "hidden" column like _row_exists gets because it is a virtual column
+            /// and so it is not in the list of AllPhysical columns.
+            for (const auto & [column_name, _] : prepared_stages[i].column_to_updated)
+            {
+                if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && has_filters && !deleted_mask_updated)
+                    continue;
 
-        /// Make sure that all updated columns are included into output_columns set.
-        /// This is important for a "hidden" column like _row_exists gets because it is a virtual column
-        /// and so it is not in the list of AllPhysical columns.
-        for (const auto & kv : prepared_stages[i].column_to_updated)
-            prepared_stages[i].output_columns.insert(kv.first);
+                prepared_stages[i].output_columns.insert(column_name);
+            }
+        }
     }
 
     /// Now, calculate `expressions_chain` for each stage except the first.
@@ -1024,7 +1080,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
             all_asts->children.push_back(kv.second);
 
         /// Add all output columns to prevent ExpressionAnalyzer from deleting them from source columns.
-        for (const String & column : stage.output_columns)
+        for (const auto & column : stage.output_columns)
             all_asts->children.push_back(std::make_shared<ASTIdentifier>(column));
 
         /// Executing scalar subquery on that stage can lead to deadlock
@@ -1081,7 +1137,6 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
             actions_chain.getLastStep().addRequiredOutput(name);
 
         actions_chain.getLastActions();
-
         actions_chain.finalize();
 
         if (i)
diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h
index 1372ea77f4f..eda94190185 100644
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@@ -32,6 +32,8 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
     ContextPtr context
 );
 
+MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command);
+
 /// Create an input stream that will read data from storage and apply mutation commands (UPDATEs, DELETEs, MATERIALIZEs)
 /// to this data.
 class MutationsInterpreter
@@ -213,6 +215,7 @@ private:
     std::unique_ptr<Block> updated_header;
     std::vector<Stage> stages;
     bool is_prepared = false; /// Has the sequence of stages been prepared.
+    bool deleted_mask_updated = false;
 
     NameSet materialized_indices;
     NameSet materialized_projections;
diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index 84893011222..ea116e6ccfd 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -466,6 +466,22 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
         settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO ";
         rename_to->formatImpl(settings, state, frame);
     }
+    else if (type == ASTAlterCommand::APPLY_DELETED_MASK)
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << "APPLY DELETED MASK" << (settings.hilite ? hilite_none : "");
+
+        if (partition)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
+            partition->formatImpl(settings, state, frame);
+        }
+
+        if (predicate)
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
+            predicate->formatImpl(settings, state, frame);
+        }
+    }
     else
         throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected type of ALTER");
 }
diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h
index e601739595f..77c540aed33 100644
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@@ -71,6 +71,7 @@ public:
 
         DELETE,
         UPDATE,
+        APPLY_DELETED_MASK,
 
         NO_TYPE,
 
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index c616c6e0441..3522611ec4c 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -111,6 +111,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
 
     ParserKeyword s_remove_ttl("REMOVE TTL");
     ParserKeyword s_remove_sample_by("REMOVE SAMPLE BY");
+    ParserKeyword s_apply_deleted_mask("APPLY DELETED MASK");
 
     ParserCompoundIdentifier parser_name;
     ParserStringLiteral parser_string_literal;
@@ -823,6 +824,22 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
 
                 command->type = ASTAlterCommand::MODIFY_COMMENT;
             }
+            else if (s_apply_deleted_mask.ignore(pos, expected))
+            {
+                command->type = ASTAlterCommand::APPLY_DELETED_MASK;
+
+                if (s_in_partition.ignore(pos, expected))
+                {
+                    if (!parser_partition.parse(pos, command->partition, expected))
+                        return false;
+                }
+
+                if (s_where.ignore(pos, expected))
+                {
+                    if (!parser_exp_elem.parse(pos, command->predicate, expected))
+                        return false;
+                }
+            }
             else
                 return false;
         }
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 6b6b5947581..fc36840cf92 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -51,7 +51,6 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis
     return true;
 }
 
-
 /** Split mutation commands into two parts:
 *   First part should be executed by mutations interpreter.
 *   Other is just simple drop/renames, so they can be executed without interpreter.
@@ -79,7 +78,8 @@ static void splitAndModifyMutationCommands(
                 || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
                 || command.type == MutationCommand::Type::MATERIALIZE_TTL
                 || command.type == MutationCommand::Type::DELETE
-                || command.type == MutationCommand::Type::UPDATE)
+                || command.type == MutationCommand::Type::UPDATE
+                || command.type == MutationCommand::Type::APPLY_DELETED_MASK)
             {
                 for_interpreter.push_back(command);
                 for (const auto & [column_name, expr] : command.column_to_update_expression)
@@ -202,7 +202,8 @@ static void splitAndModifyMutationCommands(
                 || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION
                 || command.type == MutationCommand::Type::MATERIALIZE_TTL
                 || command.type == MutationCommand::Type::DELETE
-                || command.type == MutationCommand::Type::UPDATE)
+                || command.type == MutationCommand::Type::UPDATE
+                || command.type == MutationCommand::Type::APPLY_DELETED_MASK)
             {
                 for_interpreter.push_back(command);
             }
@@ -257,15 +258,12 @@ getColumnsForNewDataPart(
     NameToNameMap renamed_columns_from_to;
     ColumnsDescription part_columns(source_part->getColumns());
     NamesAndTypesList system_columns;
-    if (source_part->supportLightweightDeleteMutate())
-        system_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN);
 
-    /// Preserve system columns that have persisted values in the source_part
-    for (const auto & column : system_columns)
-    {
-        if (part_columns.has(column.name) && !storage_columns.contains(column.name))
-            storage_columns.emplace_back(column);
-    }
+    const auto & deleted_mask_column = LightweightDeleteDescription::FILTER_COLUMN;
+    bool supports_lightweight_deletes = source_part->supportLightweightDeleteMutate();
+
+    bool deleted_mask_updated = false;
+    bool has_delete_command = false;
 
     NameSet storage_columns_set;
     for (const auto & [name, _] : storage_columns)
@@ -277,23 +275,22 @@ getColumnsForNewDataPart(
         {
             for (const auto & [column_name, _] : command.column_to_update_expression)
             {
-                /// Allow to update and persist values of system column
-                auto column = system_columns.tryGetByName(column_name);
-                if (column && !storage_columns.contains(column_name))
-                    storage_columns.emplace_back(column_name, column->type);
+                if (column_name == deleted_mask_column.name
+                    && supports_lightweight_deletes
+                    && !storage_columns_set.contains(deleted_mask_column.name))
+                    deleted_mask_updated = true;
             }
         }
 
+        if (command.type == MutationCommand::DELETE || command.type == MutationCommand::APPLY_DELETED_MASK)
+            has_delete_command = true;
+
         /// If we don't have this column in source part, than we don't need to materialize it
         if (!part_columns.has(command.column_name))
-        {
             continue;
-        }
 
         if (command.type == MutationCommand::DROP_COLUMN)
-        {
             removed_columns.insert(command.column_name);
-        }
 
         if (command.type == MutationCommand::RENAME_COLUMN)
         {
@@ -302,6 +299,15 @@ getColumnsForNewDataPart(
         }
     }
 
+    if (!storage_columns_set.contains(deleted_mask_column.name))
+    {
+        if (deleted_mask_updated || (part_columns.has(deleted_mask_column.name) && !has_delete_command))
+        {
+            storage_columns.push_back(deleted_mask_column);
+            storage_columns_set.insert(deleted_mask_column.name);
+        }
+    }
+
     SerializationInfoByName new_serialization_infos;
     for (const auto & [name, old_info] : serialization_infos)
     {
@@ -1900,6 +1906,9 @@ static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, con
             return true;
     }
 
+    if (command.type == MutationCommand::APPLY_DELETED_MASK && !part->hasLightweightDelete())
+        return true;
+
     if (canSkipConversionToNullable(part, command))
         return true;
 
diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp
index 03200d0d9fa..36388a32b41 100644
--- a/src/Storages/MutationCommands.cpp
+++ b/src/Storages/MutationCommands.cpp
@@ -59,6 +59,15 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
         }
         return res;
     }
+    else if (command->type == ASTAlterCommand::APPLY_DELETED_MASK)
+    {
+        MutationCommand res;
+        res.ast = command->ptr();
+        res.type = APPLY_DELETED_MASK;
+        res.predicate = command->predicate;
+        res.partition = command->partition;
+        return res;
+    }
     else if (command->type == ASTAlterCommand::MATERIALIZE_INDEX)
     {
         MutationCommand res;
diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h
index 014a227dff3..6e10f7d9b2d 100644
--- a/src/Storages/MutationCommands.h
+++ b/src/Storages/MutationCommands.h
@@ -39,6 +39,7 @@ struct MutationCommand
         MATERIALIZE_TTL,
         RENAME_COLUMN,
         MATERIALIZE_COLUMN,
+        APPLY_DELETED_MASK,
         ALTER_WITHOUT_MUTATION, /// pure metadata command, currently unusned
     };
 
diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp
index a22ba6586ac..34c092c7208 100644
--- a/src/Storages/StorageSnapshot.cpp
+++ b/src/Storages/StorageSnapshot.cpp
@@ -35,6 +35,7 @@ void StorageSnapshot::init()
 
     if (storage.hasLightweightDeletedMask())
         system_columns[LightweightDeleteDescription::FILTER_COLUMN.name] = LightweightDeleteDescription::FILTER_COLUMN.type;
+
     system_columns[BlockNumberColumn::name] = BlockNumberColumn::type;
 }
 
diff --git a/tests/queries/0_stateless/02932_apply_deleted_mask.reference b/tests/queries/0_stateless/02932_apply_deleted_mask.reference
new file mode 100644
index 00000000000..0d75f7c1b30
--- /dev/null
+++ b/tests/queries/0_stateless/02932_apply_deleted_mask.reference
@@ -0,0 +1,6 @@
+10	45
+all_1_1_0	10	0
+7	33
+all_1_1_0_2	10	1
+7	33
+all_1_1_0_3	7	0
diff --git a/tests/queries/0_stateless/02932_apply_deleted_mask.sql b/tests/queries/0_stateless/02932_apply_deleted_mask.sql
new file mode 100644
index 00000000000..602c67de52e
--- /dev/null
+++ b/tests/queries/0_stateless/02932_apply_deleted_mask.sql
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS t_materialize_delete;
+
+CREATE TABLE t_materialize_delete (id UInt64, v UInt64) ENGINE = MergeTree ORDER BY tuple() settings min_bytes_for_wide_part = 0;
+
+SET mutations_sync = 2;
+
+INSERT INTO t_materialize_delete SELECT number, number FROM numbers(10);
+
+SELECT count(), sum(v) FROM t_materialize_delete;
+SELECT name, rows, has_lightweight_delete FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+
+DELETE FROM t_materialize_delete WHERE id % 3 = 1;
+
+SELECT count(), sum(v) FROM t_materialize_delete;
+SELECT name, rows, has_lightweight_delete FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+
+ALTER TABLE t_materialize_delete APPLY DELETED MASK;
+
+SELECT count(), sum(v) FROM t_materialize_delete;
+SELECT name, rows, has_lightweight_delete FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+
+DROP TABLE t_materialize_delete;

From 8d30e22a09613257e049fd07414eeaf0b7bb9097 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 4 Dec 2023 13:00:50 +0000
Subject: [PATCH 018/137] fix lighweight delete with heavy delete

---
 src/Interpreters/MutationsInterpreter.cpp     |  4 +--
 src/Storages/MergeTree/MergeTask.cpp          |  4 +++
 .../MergeTree/MergeTreeSequentialSource.cpp   | 36 +++++++++++++------
 .../MergeTree/MergeTreeSequentialSource.h     |  4 ++-
 .../02352_lightweight_delete.reference        |  2 +-
 ...02521_lightweight_delete_and_ttl.reference |  2 +-
 6 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 4cef15f6220..d333477f36e 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -1030,7 +1030,6 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
     if (source.hasLightweightDeleteMask())
         all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN);
 
-
     bool has_filters = false;
     /// Next, for each stage calculate columns changed by this and previous stages.
     for (size_t i = 0; i < prepared_stages.size(); ++i)
@@ -1046,6 +1045,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
             }
 
             has_filters = true;
+            settings.apply_deleted_mask = true;
         }
         else
         {
@@ -1279,7 +1279,7 @@ void MutationsInterpreter::Source::read(
 
         VirtualColumns virtual_columns(std::move(required_columns), part);
 
-        createMergeTreeSequentialSource(
+        createReadFromPartStep(
             plan, *data, storage_snapshot, part,
             std::move(virtual_columns.columns_to_read),
             apply_deleted_mask_, filter, context_,
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index e8e307bb148..94bd0f98986 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -556,6 +556,8 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
             global_ctx->storage_snapshot,
             global_ctx->future_part->parts[part_num],
             column_names,
+            /*mark_ranges=*/ {},
+            /*apply_deleted_mask=*/ true,
             ctx->read_with_direct_io,
             /*take_column_types_from_storage=*/ true,
             /*quiet=*/ false,
@@ -909,6 +911,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
             global_ctx->storage_snapshot,
             part,
             global_ctx->merging_column_names,
+            /*mark_ranges=*/ {},
+            /*apply_deleted_mask=*/ true,
             ctx->read_with_direct_io,
             /*take_column_types_from_storage=*/ true,
             /*quiet=*/ false,
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index a586997360a..5075e43448a 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -131,6 +131,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
         auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical)
             .withExtendedObjects()
             .withSystemColumns();
+
         if (storage.supportsSubcolumns())
             options.withSubcolumns();
         columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read);
@@ -241,19 +242,24 @@ Pipe createMergeTreeSequentialSource(
     const StorageSnapshotPtr & storage_snapshot,
     MergeTreeData::DataPartPtr data_part,
     Names columns_to_read,
+    std::optional<MarkRanges> mark_ranges,
+    bool apply_deleted_mask,
     bool read_with_direct_io,
     bool take_column_types_from_storage,
     bool quiet,
     std::shared_ptr<std::atomic<size_t>> filtered_rows_count)
 {
+    const auto & filter_column = LightweightDeleteDescription::FILTER_COLUMN;
+
     /// The part might have some rows masked by lightweight deletes
-    const bool need_to_filter_deleted_rows = data_part->hasLightweightDelete();
-    auto columns = columns_to_read;
-    if (need_to_filter_deleted_rows)
-        columns.emplace_back(LightweightDeleteDescription::FILTER_COLUMN.name);
+    const bool need_to_filter_deleted_rows = apply_deleted_mask && data_part->hasLightweightDelete();
+    const bool has_filter_column = std::ranges::find(columns_to_read, filter_column.name) != columns_to_read.end();
+
+    if (need_to_filter_deleted_rows && !has_filter_column)
+        columns_to_read.emplace_back(filter_column.name);
 
     auto column_part_source = std::make_shared<MergeTreeSequentialSource>(
-        storage, storage_snapshot, data_part, columns, std::optional<MarkRanges>{},
+        storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges),
         /*apply_deleted_mask=*/ false, read_with_direct_io, take_column_types_from_storage, quiet);
 
     Pipe pipe(std::move(column_part_source));
@@ -261,10 +267,10 @@ Pipe createMergeTreeSequentialSource(
     /// Add filtering step that discards deleted rows
     if (need_to_filter_deleted_rows)
     {
-        pipe.addSimpleTransform([filtered_rows_count](const Block & header)
+        pipe.addSimpleTransform([filtered_rows_count, has_filter_column](const Block & header)
         {
             return std::make_shared<FilterTransform>(
-                header, nullptr, LightweightDeleteDescription::FILTER_COLUMN.name, true, false, filtered_rows_count);
+                header, nullptr, filter_column.name, !has_filter_column, false, filtered_rows_count);
         });
     }
 
@@ -325,9 +331,17 @@ public:
             }
         }
 
-        auto source = std::make_unique<MergeTreeSequentialSource>(
-            storage, storage_snapshot, data_part, columns_to_read,
-            std::move(mark_ranges), apply_deleted_mask, false, true);
+        auto source = createMergeTreeSequentialSource(
+            storage,
+            storage_snapshot,
+            data_part,
+            columns_to_read,
+            std::move(mark_ranges),
+            apply_deleted_mask,
+            /*read_with_direct_io=*/ false,
+            /*take_column_types_from_storage=*/ true,
+            /*quiet=*/ false,
+            /*filtered_rows_count=*/ nullptr);
 
         pipeline.init(Pipe(std::move(source)));
     }
@@ -343,7 +357,7 @@ private:
     Poco::Logger * log;
 };
 
-void createMergeTreeSequentialSource(
+void createReadFromPartStep(
     QueryPlan & plan,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h
index fb249568e8f..396d3f76886 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.h
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h
@@ -15,6 +15,8 @@ Pipe createMergeTreeSequentialSource(
     const StorageSnapshotPtr & storage_snapshot,
     MergeTreeData::DataPartPtr data_part,
     Names columns_to_read,
+    std::optional<MarkRanges> mark_ranges,
+    bool apply_deleted_mask,
     bool read_with_direct_io,
     bool take_column_types_from_storage,
     bool quiet,
@@ -22,7 +24,7 @@ Pipe createMergeTreeSequentialSource(
 
 class QueryPlan;
 
-void createMergeTreeSequentialSource(
+void createReadFromPartStep(
     QueryPlan & plan,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
diff --git a/tests/queries/0_stateless/02352_lightweight_delete.reference b/tests/queries/0_stateless/02352_lightweight_delete.reference
index 3386b3294c3..ce7c6e81ac8 100644
--- a/tests/queries/0_stateless/02352_lightweight_delete.reference
+++ b/tests/queries/0_stateless/02352_lightweight_delete.reference
@@ -26,7 +26,7 @@ Rows in parts	800000
 Count	700000
 First row	300000	1
 Do ALTER DELETE mutation that does a "heavyweight" delete
-Rows in parts	533333
+Rows in parts	466666
 Count	466666
 First row	300001	10
 Delete 100K more rows using lightweight DELETE
diff --git a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference
index 3b40d9048cd..e60b2a184db 100644
--- a/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference
+++ b/tests/queries/0_stateless/02521_lightweight_delete_and_ttl.reference
@@ -15,7 +15,7 @@ SELECT 'Count', count() FROM lwd_test_02521;
 Count	25000
 ALTER TABLE lwd_test_02521 DELETE WHERE id >= 40000 SETTINGS mutations_sync = 1;
 SELECT 'Rows in parts', SUM(rows) FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_test_02521' AND active;
-Rows in parts	40000
+Rows in parts	15000
 SELECT 'Count', count() FROM lwd_test_02521;
 Count	15000
 OPTIMIZE TABLE lwd_test_02521 FINAL SETTINGS mutations_sync = 1;

From 4ec21e94a528a14c73bddd083c4ecda44b3488bc Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 4 Dec 2023 16:58:51 +0100
Subject: [PATCH 019/137] Set http_make_head_request onlt in stress tests

---
 tests/ci/stress.py    | 3 +++
 tests/clickhouse-test | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/ci/stress.py b/tests/ci/stress.py
index ae918363df7..91b8694623b 100755
--- a/tests/ci/stress.py
+++ b/tests/ci/stress.py
@@ -68,6 +68,9 @@ def get_options(i: int, upgrade_check: bool) -> str:
     if random.random() < 0.1:
         client_options.append("optimize_trivial_approximate_count_query=1")
 
+    if random.random() < 0.3:
+        client_options.append(f"http_make_head_request={random.randint(0, 1)}")
+
     if client_options:
         options.append(" --client-option " + " ".join(client_options))
 
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 60416521ed1..4b551ed3663 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -631,7 +631,6 @@ class SettingsRandomizer:
                 get_localzone(),
             ]
         ),
-        "http_make_head_request": lambda: random.randint(0, 1),
     }
 
     @staticmethod

From 04d167c6d9db3ccd91e3e36b587082e09e3c305f Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Tue, 5 Dec 2023 13:34:37 +0100
Subject: [PATCH 020/137] Better

---
 programs/server/Server.cpp                    |  6 ++---
 src/BridgeHelper/LibraryBridgeHelper.cpp      |  2 +-
 src/BridgeHelper/XDBCBridgeHelper.h           |  2 +-
 src/Core/ServerSettings.h                     |  4 ++++
 src/Dictionaries/HTTPDictionarySource.cpp     |  4 ++--
 src/Dictionaries/XDBCDictionarySource.cpp     |  2 +-
 src/Disks/IO/ReadBufferFromWebServer.cpp      |  5 ++--
 .../ObjectStorages/Web/WebObjectStorage.cpp   |  2 +-
 src/IO/ConnectionTimeouts.cpp                 | 16 +++++++++++++
 src/IO/ConnectionTimeouts.h                   |  3 +++
 src/IO/ReadWriteBufferFromHTTP.cpp            |  5 ++--
 src/IO/ReadWriteBufferFromHTTP.h              | 15 ++++++------
 src/Interpreters/Context.cpp                  | 14 +++++++++++
 src/Interpreters/Context.h                    |  4 ++++
 .../HTTP/WriteBufferFromHTTPServerResponse.h  |  2 +-
 src/Server/HTTPHandler.cpp                    |  4 +---
 src/Server/InterserverIOHTTPHandler.cpp       |  3 +--
 src/Server/PrometheusRequestHandler.cpp       |  5 ++--
 src/Server/ReplicasStatusHandler.cpp          |  4 ++--
 src/Server/StaticRequestHandler.cpp           |  4 ++--
 src/Server/WebUIRequestHandler.cpp            |  3 ++-
 src/Storages/MergeTree/DataPartsExchange.cpp  |  3 +--
 src/Storages/MergeTree/MergeTreeSettings.h    |  8 +++----
 src/Storages/StorageReplicatedMergeTree.cpp   | 23 +++----------------
 src/Storages/StorageReplicatedMergeTree.h     |  4 ----
 src/Storages/StorageURL.cpp                   |  2 +-
 src/Storages/StorageXDBC.cpp                  |  2 +-
 src/TableFunctions/ITableFunctionXDBC.cpp     |  2 +-
 28 files changed, 83 insertions(+), 70 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index de0cfb9b9fa..778adfeb84d 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -2086,10 +2086,9 @@ void Server::createServers(
 {
     const Settings & settings = global_context->getSettingsRef();
 
-    Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
     Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
     http_params->setTimeout(settings.http_receive_timeout);
-    http_params->setKeepAliveTimeout(keep_alive_timeout);
+    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
 
     Poco::Util::AbstractConfiguration::Keys protocols;
     config.keys("protocols", protocols);
@@ -2343,10 +2342,9 @@ void Server::createInterserverServers(
 {
     const Settings & settings = global_context->getSettingsRef();
 
-    Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
     Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
     http_params->setTimeout(settings.http_receive_timeout);
-    http_params->setKeepAliveTimeout(keep_alive_timeout);
+    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
 
     /// Now iterate over interserver_listen_hosts
     for (const auto & interserver_listen_host : interserver_listen_hosts)
diff --git a/src/BridgeHelper/LibraryBridgeHelper.cpp b/src/BridgeHelper/LibraryBridgeHelper.cpp
index 60588951c32..e83707595b9 100644
--- a/src/BridgeHelper/LibraryBridgeHelper.cpp
+++ b/src/BridgeHelper/LibraryBridgeHelper.cpp
@@ -12,7 +12,7 @@ LibraryBridgeHelper::LibraryBridgeHelper(ContextPtr context_)
     , http_timeout(context_->getGlobalContext()->getSettingsRef().http_receive_timeout.value)
     , bridge_host(config.getString("library_bridge.host", DEFAULT_HOST))
     , bridge_port(config.getUInt("library_bridge.port", DEFAULT_PORT))
-    , http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), {context_->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}))
+    , http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), context_->getServerSettings().keep_alive_timeout))
 {
 }
 
diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h
index 44104f26f63..060de74b5b1 100644
--- a/src/BridgeHelper/XDBCBridgeHelper.h
+++ b/src/BridgeHelper/XDBCBridgeHelper.h
@@ -162,7 +162,7 @@ private:
 
     ConnectionTimeouts getHTTPTimeouts()
     {
-        return ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), {getContext()->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0});
+        return ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), getContext()->getServerSettings().keep_alive_timeout);
     }
 
 protected:
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 6785eea26ea..f6ceb652900 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -97,6 +97,10 @@ namespace DB
     M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
     M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
     \
+    M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
+    M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
+    M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
+    M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
     M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
     M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
     M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp
index c12f4fedf3f..689593a969e 100644
--- a/src/Dictionaries/HTTPDictionarySource.cpp
+++ b/src/Dictionaries/HTTPDictionarySource.cpp
@@ -38,7 +38,7 @@ HTTPDictionarySource::HTTPDictionarySource(
     , configuration(configuration_)
     , sample_block(sample_block_)
     , context(context_)
-    , timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}))
+    , timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout))
 {
     credentials.setUsername(credentials_.getUsername());
     credentials.setPassword(credentials_.getPassword());
@@ -51,7 +51,7 @@ HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other)
     , configuration(other.configuration)
     , sample_block(other.sample_block)
     , context(Context::createCopy(other.context))
-    , timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}))
+    , timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout))
 {
     credentials.setUsername(other.credentials.getUsername());
     credentials.setPassword(other.credentials.getPassword());
diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp
index 23dc7db508d..080f7db96be 100644
--- a/src/Dictionaries/XDBCDictionarySource.cpp
+++ b/src/Dictionaries/XDBCDictionarySource.cpp
@@ -76,7 +76,7 @@ XDBCDictionarySource::XDBCDictionarySource(
     , load_all_query(query_builder.composeLoadAllQuery())
     , bridge_helper(bridge_)
     , bridge_url(bridge_helper->getMainURI())
-    , timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), {context_->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}))
+    , timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), context_->getServerSettings().keep_alive_timeout))
 {
     auto url_params = bridge_helper->getURLParams(max_block_size);
     for (const auto & [name, value] : url_params)
diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp
index 46d8c41ff78..90cd5285875 100644
--- a/src/Disks/IO/ReadBufferFromWebServer.cpp
+++ b/src/Disks/IO/ReadBufferFromWebServer.cpp
@@ -54,8 +54,7 @@ std::unique_ptr<ReadBuffer> ReadBufferFromWebServer::initialize()
     }
 
     const auto & settings = context->getSettingsRef();
-    const auto & config = context->getConfigRef();
-    Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0};
+    const auto & server_settings = context->getServerSettings();
 
     auto res = std::make_unique<ReadWriteBufferFromHTTP>(
         uri,
@@ -65,7 +64,7 @@ std::unique_ptr<ReadBuffer> ReadBufferFromWebServer::initialize()
                            settings.http_send_timeout,
                            std::max(Poco::Timespan(settings.http_receive_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)),
                            settings.tcp_keep_alive_timeout,
-                           http_keep_alive_timeout),
+                           server_settings.keep_alive_timeout),
         credentials,
         0,
         buf_size,
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 0103188b562..f3b0cb8b9a0 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -47,7 +47,7 @@ void WebObjectStorage::initialize(const String & uri_path, const std::unique_loc
             ReadWriteBufferFromHTTP::OutStreamCallback(),
             ConnectionTimeouts::getHTTPTimeouts(
                 getContext()->getSettingsRef(),
-                {getContext()->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}),
+                getContext()->getServerSettings().keep_alive_timeout),
             credentials,
             /* max_redirects= */ 0,
             /* buffer_size_= */ DBMS_DEFAULT_BUFFER_SIZE,
diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp
index 970afc75ec3..88073a72d78 100644
--- a/src/IO/ConnectionTimeouts.cpp
+++ b/src/IO/ConnectionTimeouts.cpp
@@ -133,6 +133,22 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
         settings.http_receive_timeout);
 }
 
+ConnectionTimeouts ConnectionTimeouts::getFetchPartHTTPTimeouts(const ServerSettings & server_settings, const Settings & user_settings)
+{
+    auto timeouts = getHTTPTimeouts(user_settings, server_settings.keep_alive_timeout);
+
+    if (server_settings.replicated_fetches_http_connection_timeout.changed)
+        timeouts.connection_timeout = server_settings.replicated_fetches_http_connection_timeout;
+
+    if (server_settings.replicated_fetches_http_send_timeout.changed)
+        timeouts.send_timeout = server_settings.replicated_fetches_http_send_timeout;
+
+    if (server_settings.replicated_fetches_http_receive_timeout.changed)
+        timeouts.receive_timeout = server_settings.replicated_fetches_http_receive_timeout;
+
+    return timeouts;
+}
+
 class SendReceiveTimeoutsForFirstAttempt
 {
 private:
diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h
index aabebdb836d..42c4312d1d8 100644
--- a/src/IO/ConnectionTimeouts.h
+++ b/src/IO/ConnectionTimeouts.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Core/Defines.h>
+#include <Core/ServerSettings.h>
 #include <Interpreters/Context_fwd.h>
 
 #include <Poco/Timespan.h>
@@ -68,6 +69,8 @@ struct ConnectionTimeouts
     static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings);
     static ConnectionTimeouts getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout);
 
+    static ConnectionTimeouts getFetchPartHTTPTimeouts(const ServerSettings & server_settings, const Settings & user_settings);
+
     ConnectionTimeouts getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const;
 };
 
diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp
index 6dd6269e16f..858b2f7cb03 100644
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@@ -920,13 +920,12 @@ PooledReadWriteBufferFromHTTP::PooledReadWriteBufferFromHTTP(
     Poco::URI uri_,
     const std::string & method_,
     OutStreamCallback out_stream_callback_,
-    const ConnectionTimeouts & timeouts_,
     const Poco::Net::HTTPBasicCredentials & credentials_,
     size_t buffer_size_,
     const UInt64 max_redirects,
-    size_t max_connections_per_endpoint)
+    PooledSessionFactoryPtr session_factory)
     : Parent(
-        std::make_shared<SessionType>(uri_, max_redirects, std::make_shared<PooledSessionFactory>(timeouts_, max_connections_per_endpoint)),
+        std::make_shared<SessionType>(uri_, max_redirects, session_factory),
         uri_,
         credentials_,
         method_,
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index 8f0e2388e5b..29c0804bb28 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -265,6 +265,8 @@ private:
     size_t per_endpoint_pool_size;
 };
 
+using PooledSessionFactoryPtr = std::shared_ptr<PooledSessionFactory>;
+
 class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<PooledSessionFactory>>>
 {
     using SessionType = UpdatableSession<PooledSessionFactory>;
@@ -273,13 +275,12 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase
 public:
     explicit PooledReadWriteBufferFromHTTP(
         Poco::URI uri_,
-        const std::string & method_ = {},
-        OutStreamCallback out_stream_callback_ = {},
-        const ConnectionTimeouts & timeouts_ = {},
-        const Poco::Net::HTTPBasicCredentials & credentials_ = {},
-        size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
-        const UInt64 max_redirects = 0,
-        size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT);
+        const std::string & method_,
+        OutStreamCallback out_stream_callback_,
+        const Poco::Net::HTTPBasicCredentials & credentials_,
+        size_t buffer_size_,
+        const UInt64 max_redirects,
+        PooledSessionFactoryPtr session_factory);
 };
 
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 1c8a505a128..3623f7e4d0a 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -76,6 +76,7 @@
 #include <Interpreters/Session.h>
 #include <Interpreters/TraceCollector.h>
 #include <IO/ReadBufferFromFile.h>
+#include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/UncompressedCache.h>
 #include <IO/MMappedFileCache.h>
 #include <IO/WriteSettings.h>
@@ -354,6 +355,8 @@ struct ContextSharedPart : boost::noncopyable
     OrdinaryBackgroundExecutorPtr moves_executor TSA_GUARDED_BY(background_executors_mutex);
     OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex);
     OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex);
+    /// The global pool of HTTP sessions for background fetches.
+    PooledSessionFactoryPtr fetches_session_factory TSA_GUARDED_BY(background_executors_mutex);
 
     RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex);                    /// Allowed URL from config.xml
     HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex);                    /// Forbidden HTTP headers from config.xml
@@ -4779,6 +4782,11 @@ void Context::initializeBackgroundExecutorsIfNeeded()
     );
     LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size);
 
+    auto timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getServerSettings(), getSettingsRef());
+    /// The number of background fetches is limited by the number of threads in the background thread pool.
+    /// It doesn't make any sense to limit the number of connections per host any further.
+    shared->fetches_session_factory = std::make_shared<PooledSessionFactory>(timeouts, background_fetches_pool_size);
+
     shared->fetch_executor = std::make_shared<OrdinaryBackgroundExecutor>
     (
         "Fetch",
@@ -4832,6 +4840,12 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const
     return shared->common_executor;
 }
 
+PooledSessionFactoryPtr Context::getCommonFetchesSessionFactory() const
+{
+    SharedLockGuard lock(shared->background_executors_mutex);
+    return shared->fetches_session_factory;
+}
+
 IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const
 {
     callOnce(shared->readers_initialized, [&] {
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 8e0522326f5..7a3a1236f27 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -201,6 +201,9 @@ using TemporaryDataOnDiskScopePtr = std::shared_ptr<TemporaryDataOnDiskScope>;
 class PreparedSetsCache;
 using PreparedSetsCachePtr = std::shared_ptr<PreparedSetsCache>;
 
+class PooledSessionFactory;
+using PooledSessionFactoryPtr = std::shared_ptr<PooledSessionFactory>;
+
 class SessionTracker;
 
 struct ServerSettings;
@@ -1207,6 +1210,7 @@ public:
     OrdinaryBackgroundExecutorPtr getMovesExecutor() const;
     OrdinaryBackgroundExecutorPtr getFetchesExecutor() const;
     OrdinaryBackgroundExecutorPtr getCommonExecutor() const;
+    PooledSessionFactoryPtr getCommonFetchesSessionFactory() const;
 
     IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const;
 
diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
index 94202e1e0e8..38345f27952 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
@@ -36,7 +36,7 @@ public:
     WriteBufferFromHTTPServerResponse(
         HTTPServerResponse & response_,
         bool is_http_method_head_,
-        size_t keep_alive_timeout_,
+        UInt64 keep_alive_timeout_,
         bool compress_ = false,        /// If true - set Content-Encoding header and compress the result.
         CompressionMethod compression_method_ = CompressionMethod::None);
 
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index f9cd3b40f4a..f092e5f5cc5 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -616,12 +616,10 @@ void HTTPHandler::processQuery(
     size_t buffer_size_http = DBMS_DEFAULT_BUFFER_SIZE;
     size_t buffer_size_memory = (buffer_size_total > buffer_size_http) ? buffer_size_total : 0;
 
-    unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
-
     used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
         response,
         request.getMethod() == HTTPRequest::HTTP_HEAD,
-        keep_alive_timeout,
+        context->getServerSettings().keep_alive_timeout.totalSeconds(),
         client_supports_http_compression,
         http_response_compression_method);
 
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 5f6da208778..53773a83b40 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -87,8 +87,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
         response.setChunkedTransferEncoding(true);
 
     Output used_output;
-    const auto & config = server.config();
-    unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
+    const auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
     used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
         response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
 
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index 6d35386bfb5..b871d1b0ddc 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -1,6 +1,7 @@
 #include <Server/PrometheusRequestHandler.h>
 
 #include <IO/HTTPCommon.h>
+#include <Interpreters/Context.h>
 #include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #include <Server/HTTPHandlerFactory.h>
 #include <Server/IServer.h>
@@ -17,9 +18,7 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
 {
     try
     {
-        const auto & config = server.config();
-        unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
-
+        const auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
         setResponseDefaultHeaders(response, keep_alive_timeout);
 
         response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp
index ad54b24f31d..b50fb955563 100644
--- a/src/Server/ReplicasStatusHandler.cpp
+++ b/src/Server/ReplicasStatusHandler.cpp
@@ -78,8 +78,8 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe
             }
         }
 
-        const auto & config = getContext()->getConfigRef();
-        setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT));
+        const auto & server_settings = getContext()->getServerSettings();
+        setResponseDefaultHeaders(response, server_settings.keep_alive_timeout.totalSeconds());
 
         if (!ok)
         {
diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp
index a7e85d161c1..34cb5d2d169 100644
--- a/src/Server/StaticRequestHandler.cpp
+++ b/src/Server/StaticRequestHandler.cpp
@@ -34,7 +34,7 @@ namespace ErrorCodes
 }
 
 static inline WriteBufferPtr
-responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, unsigned int keep_alive_timeout)
+responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, UInt64 keep_alive_timeout)
 {
     /// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
     String http_response_compression_methods = request.get("Accept-Encoding", "");
@@ -90,7 +90,7 @@ static inline void trySendExceptionToClient(
 
 void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
+    auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
     const auto & out = responseWriteBuffer(request, response, keep_alive_timeout);
 
     try
diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp
index 12d2588723e..6277131fd5c 100644
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@@ -5,6 +5,7 @@
 #include <Poco/Net/HTTPServerResponse.h>
 #include <Poco/Util/LayeredConfiguration.h>
 
+#include <Interpreters/Context.h>
 #include <IO/HTTPCommon.h>
 
 #ifdef __clang__
@@ -37,7 +38,7 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_)
 
 void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
+    auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
 
     response.setContentType("text/html; charset=UTF-8");
 
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 2bf2f9fddc7..c54947dde8e 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -522,11 +522,10 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
         uri,
         Poco::Net::HTTPRequest::HTTP_POST,
         nullptr,
-        timeouts,
         creds,
         DBMS_DEFAULT_BUFFER_SIZE,
         0, /* no redirects */
-        static_cast<uint64_t>(data_settings->replicated_max_parallel_fetches_for_host));
+        context->getCommonFetchesSessionFactory());
 
     int server_protocol_version = parse<int>(in->getResponseCookie("server_protocol_version", "0"));
     String remote_fs_metadata = parse<String>(in->getResponseCookie("remote_fs_metadata", ""));
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index f55fb5b8fac..922eb266512 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -110,10 +110,6 @@ struct Settings;
     M(UInt64, max_files_to_modify_in_alter_columns, 75, "Not apply ALTER if number of files for modification(deletion, addition) more than this.", 0) \
     M(UInt64, max_files_to_remove_in_alter_columns, 50, "Not apply ALTER, if number of files for deletion more than this.", 0) \
     M(Float, replicated_max_ratio_of_wrong_parts, 0.5, "If ratio of wrong parts to total number of parts is less than this - allow to start.", 0) \
-    M(UInt64, replicated_max_parallel_fetches_for_host, DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT, "Limit parallel fetches from endpoint (actually pool size).", 0) \
-    M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
-    M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
-    M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
     M(Bool, replicated_can_become_leader, true, "If true, Replicated tables replicas on this node will try to acquire leadership.", 0) \
     M(Seconds, zookeeper_session_expiration_check_period, 60, "ZooKeeper session expiration check period, in seconds.", 0) \
     M(Seconds, initialization_retry_period, 60, "Retry period for table initialization, in seconds.", 0) \
@@ -214,6 +210,10 @@ struct Settings;
     MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Bool, use_metadata_cache, false) \
     MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, merge_tree_enable_clear_old_broken_detached, 0) \
     MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30) \
+    MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_connection_timeout, 0) \
+    MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_send_timeout, 0) \
+    MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_receive_timeout, 0) \
+    MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, replicated_max_parallel_fetches_for_host, DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT) \
 
     /// Settings that should not change after the creation of a table.
     /// NOLINTNEXTLINE
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 8114db9241f..8f9147fd743 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -264,7 +264,7 @@ String StorageReplicatedMergeTree::getEndpointName() const
 
 static ConnectionTimeouts getHTTPTimeouts(ContextPtr context)
 {
-    return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0});
+    return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout);
 }
 
 static MergeTreePartInfo makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(const String & partition_id)
@@ -2711,7 +2711,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry)
         {
             String source_replica_path = fs::path(zookeeper_path) / "replicas" / part_desc->replica;
             ReplicatedMergeTreeAddress address(getZooKeeper()->get(fs::path(source_replica_path) / "host"));
-            auto timeouts = getFetchPartHTTPTimeouts(getContext());
+            auto timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getContext()->getServerSettings(), getContext()->getSettingsRef());
 
             auto credentials = getContext()->getInterserverCredentials();
             String interserver_scheme = getContext()->getInterserverScheme();
@@ -4183,23 +4183,6 @@ void StorageReplicatedMergeTree::stopBeingLeader()
     is_leader = false;
 }
 
-ConnectionTimeouts StorageReplicatedMergeTree::getFetchPartHTTPTimeouts(ContextPtr local_context)
-{
-    auto timeouts = getHTTPTimeouts(local_context);
-    auto settings = getSettings();
-
-    if (settings->replicated_fetches_http_connection_timeout.changed)
-        timeouts.connection_timeout = settings->replicated_fetches_http_connection_timeout;
-
-    if (settings->replicated_fetches_http_send_timeout.changed)
-        timeouts.send_timeout = settings->replicated_fetches_http_send_timeout;
-
-    if (settings->replicated_fetches_http_receive_timeout.changed)
-        timeouts.receive_timeout = settings->replicated_fetches_http_receive_timeout;
-
-    return timeouts;
-}
-
 bool StorageReplicatedMergeTree::checkReplicaHavePart(const String & replica, const String & part_name)
 {
     auto zookeeper = getZooKeeper();
@@ -4798,7 +4781,7 @@ bool StorageReplicatedMergeTree::fetchPart(
     else
     {
         address.fromString(zookeeper->get(fs::path(source_replica_path) / "host"));
-        timeouts = getFetchPartHTTPTimeouts(getContext());
+        timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getContext()->getServerSettings(), getContext()->getSettingsRef());
 
         credentials = getContext()->getInterserverCredentials();
         interserver_scheme = getContext()->getInterserverScheme();
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index a8ab8eb7013..d919bae9b82 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -761,10 +761,6 @@ private:
         int32_t alter_version,
         int32_t log_version);
 
-    /// Exchange parts.
-
-    ConnectionTimeouts getFetchPartHTTPTimeouts(ContextPtr context);
-
     /** Returns an empty string if no one has a part.
       */
     String findReplicaHavingPart(const String & part_name, bool active);
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index cf3aa5ac175..50eda3eac78 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -93,7 +93,7 @@ static bool urlWithGlobs(const String & uri)
 
 static ConnectionTimeouts getHTTPTimeouts(ContextPtr context)
 {
-    return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0});
+    return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout);
 }
 
 IStorageURLBase::IStorageURLBase(
diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp
index 0ba8838d4c3..a569c50835c 100644
--- a/src/Storages/StorageXDBC.cpp
+++ b/src/Storages/StorageXDBC.cpp
@@ -142,7 +142,7 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMet
         local_context,
         ConnectionTimeouts::getHTTPTimeouts(
             local_context->getSettingsRef(),
-            {local_context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}),
+            local_context->getServerSettings().keep_alive_timeout),
         compression_method);
 }
 
diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp
index b1746ea769f..ca6d40a05a3 100644
--- a/src/TableFunctions/ITableFunctionXDBC.cpp
+++ b/src/TableFunctions/ITableFunctionXDBC.cpp
@@ -159,7 +159,7 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr contex
         {},
         ConnectionTimeouts::getHTTPTimeouts(
             context->getSettingsRef(),
-            {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}),
+            context->getServerSettings().keep_alive_timeout),
         credentials);
 
     std::string columns_info;

From c0e45c15fbb7ba5ee9f8f22c82fdaa613370bdb5 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 5 Dec 2023 13:38:25 +0000
Subject: [PATCH 021/137] add test for lightweight deletes and mutations

---
 src/Interpreters/MutationsInterpreter.cpp     |  2 +-
 src/Parsers/ASTAlterQuery.cpp                 |  6 ---
 src/Parsers/ParserAlterQuery.cpp              |  6 ---
 .../02932_apply_deleted_mask.reference        | 21 ++++++---
 .../0_stateless/02932_apply_deleted_mask.sql  | 33 +++++++++++---
 .../02932_lwd_and_mutations.reference         | 14 ++++++
 .../0_stateless/02932_lwd_and_mutations.sql   | 43 +++++++++++++++++++
 7 files changed, 100 insertions(+), 25 deletions(-)
 create mode 100644 tests/queries/0_stateless/02932_lwd_and_mutations.reference
 create mode 100644 tests/queries/0_stateless/02932_lwd_and_mutations.sql

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index d333477f36e..a492ea266cf 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -1027,7 +1027,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
     auto all_columns = storage_snapshot->getColumnsByNames(options, available_columns);
 
     /// Add _row_exists column if it is present in the part
-    if (source.hasLightweightDeleteMask())
+    if (source.hasLightweightDeleteMask() || deleted_mask_updated)
         all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN);
 
     bool has_filters = false;
diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp
index ea116e6ccfd..ed9de6a46eb 100644
--- a/src/Parsers/ASTAlterQuery.cpp
+++ b/src/Parsers/ASTAlterQuery.cpp
@@ -475,12 +475,6 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
             partition->formatImpl(settings, state, frame);
         }
-
-        if (predicate)
-        {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
-            predicate->formatImpl(settings, state, frame);
-        }
     }
     else
         throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected type of ALTER");
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index 3522611ec4c..6c772db0193 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -833,12 +833,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                     if (!parser_partition.parse(pos, command->partition, expected))
                         return false;
                 }
-
-                if (s_where.ignore(pos, expected))
-                {
-                    if (!parser_exp_elem.parse(pos, command->predicate, expected))
-                        return false;
-                }
             }
             else
                 return false;
diff --git a/tests/queries/0_stateless/02932_apply_deleted_mask.reference b/tests/queries/0_stateless/02932_apply_deleted_mask.reference
index 0d75f7c1b30..22499472f84 100644
--- a/tests/queries/0_stateless/02932_apply_deleted_mask.reference
+++ b/tests/queries/0_stateless/02932_apply_deleted_mask.reference
@@ -1,6 +1,15 @@
-10	45
-all_1_1_0	10	0
-7	33
-all_1_1_0_2	10	1
-7	33
-all_1_1_0_3	7	0
+Inserted
+100	4950
+10	100	0
+Lighweight deleted
+86	4271
+10	100	10
+Mask applied
+86	4271
+10	86	0
+Lighweight deleted
+72	3578
+10	86	10
+Mask applied in partition
+72	3578
+10	84	9
diff --git a/tests/queries/0_stateless/02932_apply_deleted_mask.sql b/tests/queries/0_stateless/02932_apply_deleted_mask.sql
index 602c67de52e..0ada0640a8f 100644
--- a/tests/queries/0_stateless/02932_apply_deleted_mask.sql
+++ b/tests/queries/0_stateless/02932_apply_deleted_mask.sql
@@ -1,22 +1,43 @@
 DROP TABLE IF EXISTS t_materialize_delete;
 
-CREATE TABLE t_materialize_delete (id UInt64, v UInt64) ENGINE = MergeTree ORDER BY tuple() settings min_bytes_for_wide_part = 0;
+CREATE TABLE t_materialize_delete (id UInt64, v UInt64)
+ENGINE = MergeTree ORDER BY id PARTITION BY id % 10;
 
 SET mutations_sync = 2;
 
-INSERT INTO t_materialize_delete SELECT number, number FROM numbers(10);
+INSERT INTO t_materialize_delete SELECT number, number FROM numbers(100);
+
+SELECT 'Inserted';
 
 SELECT count(), sum(v) FROM t_materialize_delete;
-SELECT name, rows, has_lightweight_delete FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
 
-DELETE FROM t_materialize_delete WHERE id % 3 = 1;
+SELECT 'Lighweight deleted';
+
+DELETE FROM t_materialize_delete WHERE id % 7 = 3;
 
 SELECT count(), sum(v) FROM t_materialize_delete;
-SELECT name, rows, has_lightweight_delete FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+
+SELECT 'Mask applied';
 
 ALTER TABLE t_materialize_delete APPLY DELETED MASK;
 
 SELECT count(), sum(v) FROM t_materialize_delete;
-SELECT name, rows, has_lightweight_delete FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+
+SELECT 'Lighweight deleted';
+
+DELETE FROM t_materialize_delete WHERE id % 7 = 4;
+
+SELECT count(), sum(v) FROM t_materialize_delete;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
+
+SELECT 'Mask applied in partition';
+
+ALTER TABLE t_materialize_delete APPLY DELETED MASK IN PARTITION 5;
+
+SELECT count(), sum(v) FROM t_materialize_delete;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_materialize_delete' AND active;
 
 DROP TABLE t_materialize_delete;
diff --git a/tests/queries/0_stateless/02932_lwd_and_mutations.reference b/tests/queries/0_stateless/02932_lwd_and_mutations.reference
new file mode 100644
index 00000000000..dc0d3536b8f
--- /dev/null
+++ b/tests/queries/0_stateless/02932_lwd_and_mutations.reference
@@ -0,0 +1,14 @@
+900	0	[1,2,3,4,5,6,7,8,9]
+1	1000	1
+800	200	[2,3,4,5,6,7,8,9]
+1	800	0
+700	150	[3,4,5,6,7,8,9]
+1	800	1
+600	300	[4,5,6,7,8,9]
+1	600	0
+400	200	[6,7,8,9]
+1	500	1
+200	100	[8,9]
+1	300	1
+200	100	[8,9]
+1	200	0
diff --git a/tests/queries/0_stateless/02932_lwd_and_mutations.sql b/tests/queries/0_stateless/02932_lwd_and_mutations.sql
new file mode 100644
index 00000000000..a68aca91764
--- /dev/null
+++ b/tests/queries/0_stateless/02932_lwd_and_mutations.sql
@@ -0,0 +1,43 @@
+DROP TABLE IF EXISTS t_lwd_mutations;
+
+CREATE TABLE t_lwd_mutations(id UInt64, v UInt64) ENGINE = MergeTree ORDER BY id;
+INSERT INTO t_lwd_mutations SELECT number, 0 FROM numbers(1000);
+
+SET mutations_sync = 2;
+
+DELETE FROM t_lwd_mutations WHERE id % 10 = 0;
+
+SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active;
+
+ALTER TABLE t_lwd_mutations UPDATE v = 1 WHERE id % 4 = 0, DELETE WHERE id % 10 = 1;
+
+SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active;
+
+DELETE FROM t_lwd_mutations WHERE id % 10 = 2;
+
+SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active;
+
+ALTER TABLE t_lwd_mutations UPDATE v = 1 WHERE id % 4 = 1, DELETE WHERE id % 10 = 3;
+
+SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active;
+
+ALTER TABLE t_lwd_mutations UPDATE _row_exists = 0 WHERE id % 10 = 4, DELETE WHERE id % 10 = 5;
+
+SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active;
+
+ALTER TABLE t_lwd_mutations DELETE WHERE id % 10 = 6, UPDATE _row_exists = 0 WHERE id % 10 = 7;
+
+SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active;
+
+ALTER TABLE t_lwd_mutations APPLY DELETED MASK;
+
+SELECT count(), sum(v), arraySort(groupUniqArray(id % 10)) FROM t_lwd_mutations;
+SELECT count(), sum(rows), sum(has_lightweight_delete) FROM system.parts WHERE database = currentDatabase() AND table = 't_lwd_mutations' AND active;
+
+DROP TABLE IF EXISTS t_lwd_mutations;

From 87dc575b47859b45e4c19abf12f03965e9ea60f8 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Tue, 5 Dec 2023 15:24:29 +0100
Subject: [PATCH 022/137] Better

---
 tests/queries/0_stateless/02888_obsolete_settings.reference | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/02888_obsolete_settings.reference b/tests/queries/0_stateless/02888_obsolete_settings.reference
index 0ef6404eabb..a3988e4cd9c 100644
--- a/tests/queries/0_stateless/02888_obsolete_settings.reference
+++ b/tests/queries/0_stateless/02888_obsolete_settings.reference
@@ -62,7 +62,11 @@ merge_tree_enable_clear_old_broken_detached
 min_bytes_for_compact_part
 min_relative_delay_to_yield_leadership
 min_rows_for_compact_part
+replicated_fetches_http_connection_timeout
+replicated_fetches_http_receive_timeout
+replicated_fetches_http_send_timeout
 replicated_max_parallel_fetches
+replicated_max_parallel_fetches_for_host
 replicated_max_parallel_fetches_for_table
 replicated_max_parallel_sends
 replicated_max_parallel_sends_for_table

From eb990d863df624dca621d05d14c9e8285ce7abb0 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 5 Dec 2023 15:15:46 +0000
Subject: [PATCH 023/137] fix tests

---
 src/Storages/MergeTree/MutateTask.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index d9f4eeb04ef..d5222312c80 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1536,7 +1536,8 @@ private:
 
         for (auto & command_for_interpreter : ctx->for_interpreter)
         {
-            if (command_for_interpreter.type == MutationCommand::DELETE)
+            if (command_for_interpreter.type == MutationCommand::DELETE
+                || command_for_interpreter.type == MutationCommand::APPLY_DELETED_MASK)
             {
                 has_delete = true;
                 break;

From b9f281f6721b73545a8cb4347d4a8af0f446dd9d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 5 Dec 2023 19:24:47 +0000
Subject: [PATCH 024/137] Docs: Remove mention of MV table engine

---
 .../en/engines/table-engines/special/materializedview.md | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 docs/en/engines/table-engines/special/materializedview.md

diff --git a/docs/en/engines/table-engines/special/materializedview.md b/docs/en/engines/table-engines/special/materializedview.md
deleted file mode 100644
index d5f3b364d4e..00000000000
--- a/docs/en/engines/table-engines/special/materializedview.md
+++ /dev/null
@@ -1,9 +0,0 @@
----
-slug: /en/engines/table-engines/special/materializedview
-sidebar_position: 100
-sidebar_label: MaterializedView
----
-
-# MaterializedView Table Engine
-
-Used for implementing materialized views (for more information, see [CREATE VIEW](../../../sql-reference/statements/create/view.md#materialized)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine.

From f5fb169c65a839155ce88ad9a511a88835398afa Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 6 Dec 2023 06:00:00 +0000
Subject: [PATCH 025/137] add system.dropped_tables_parts table

---
 .../StorageSystemDroppedTablesParts.cpp       | 303 ++++++++++++++++++
 .../System/StorageSystemDroppedTablesParts.h  |  25 ++
 src/Storages/System/attachSystemTables.cpp    |   2 +
 3 files changed, 330 insertions(+)
 create mode 100644 src/Storages/System/StorageSystemDroppedTablesParts.cpp
 create mode 100644 src/Storages/System/StorageSystemDroppedTablesParts.h

diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
new file mode 100644
index 00000000000..10d47371e5b
--- /dev/null
+++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
@@ -0,0 +1,303 @@
+#include <Storages/System/StorageSystemDroppedTablesParts.h>
+#include <atomic>
+#include <memory>
+#include <string_view>
+
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <Parsers/queryToString.h>
+#include <Interpreters/TransactionVersionMetadata.h>
+#include <Interpreters/Context.h>
+
+
+namespace
+{
+
+std::string_view getRemovalStateDescription(DB::DataPartRemovalState state)
+{
+    switch (state)
+    {
+    case DB::DataPartRemovalState::NOT_ATTEMPTED:
+        return "Cleanup thread hasn't seen this part yet";
+    case DB::DataPartRemovalState::VISIBLE_TO_TRANSACTIONS:
+        return "Part maybe visible for transactions";
+    case DB::DataPartRemovalState::NON_UNIQUE_OWNERSHIP:
+        return "Part ownership is not unique";
+    case DB::DataPartRemovalState::NOT_REACHED_REMOVAL_TIME:
+        return "Part hasn't reached removal time yet";
+    case DB::DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT:
+        return "Waiting mutation parent to be removed";
+    case DB::DataPartRemovalState::EMPTY_PART_COVERS_OTHER_PARTS:
+        return "Waiting for covered parts to be removed first";
+    case DB::DataPartRemovalState::REMOVED:
+        return "Part was selected to be removed";
+    }
+}
+
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+NamesAndTypesList StorageSystemDroppedTablesParts::getNamesAndTypes()
+{
+    NamesAndTypesList names_and_types{
+        {"partition",                                   std::make_shared<DataTypeString>()},
+        {"name",                                        std::make_shared<DataTypeString>()},
+        {"uuid",                                        std::make_shared<DataTypeUUID>()},
+        {"part_type",                                   std::make_shared<DataTypeString>()},
+        {"active",                                      std::make_shared<DataTypeUInt8>()},
+        {"marks",                                       std::make_shared<DataTypeUInt64>()},
+        {"rows",                                        std::make_shared<DataTypeUInt64>()},
+        {"bytes_on_disk",                               std::make_shared<DataTypeUInt64>()},
+        {"data_compressed_bytes",                       std::make_shared<DataTypeUInt64>()},
+        {"data_uncompressed_bytes",                     std::make_shared<DataTypeUInt64>()},
+        {"primary_key_size",                            std::make_shared<DataTypeUInt64>()},
+        {"marks_bytes",                                 std::make_shared<DataTypeUInt64>()},
+        {"secondary_indices_compressed_bytes",          std::make_shared<DataTypeUInt64>()},
+        {"secondary_indices_uncompressed_bytes",        std::make_shared<DataTypeUInt64>()},
+        {"secondary_indices_marks_bytes",               std::make_shared<DataTypeUInt64>()},
+        {"modification_time",                           std::make_shared<DataTypeDateTime>()},
+        {"remove_time",                                 std::make_shared<DataTypeDateTime>()},
+        {"refcount",                                    std::make_shared<DataTypeUInt32>()},
+        {"min_date",                                    std::make_shared<DataTypeDate>()},
+        {"max_date",                                    std::make_shared<DataTypeDate>()},
+        {"min_time",                                    std::make_shared<DataTypeDateTime>()},
+        {"max_time",                                    std::make_shared<DataTypeDateTime>()},
+        {"partition_id",                                std::make_shared<DataTypeString>()},
+        {"min_block_number",                            std::make_shared<DataTypeInt64>()},
+        {"max_block_number",                            std::make_shared<DataTypeInt64>()},
+        {"level",                                       std::make_shared<DataTypeUInt32>()},
+        {"data_version",                                std::make_shared<DataTypeUInt64>()},
+        {"primary_key_bytes_in_memory",                 std::make_shared<DataTypeUInt64>()},
+        {"primary_key_bytes_in_memory_allocated",       std::make_shared<DataTypeUInt64>()},
+        {"is_frozen",                                   std::make_shared<DataTypeUInt8>()},
+
+        {"database",                                    std::make_shared<DataTypeString>()},
+        {"table",                                       std::make_shared<DataTypeString>()},
+        {"engine",                                      std::make_shared<DataTypeString>()},
+        {"disk_name",                                   std::make_shared<DataTypeString>()},
+        {"path",                                        std::make_shared<DataTypeString>()},
+
+        {"hash_of_all_files",                           std::make_shared<DataTypeString>()},
+        {"hash_of_uncompressed_files",                  std::make_shared<DataTypeString>()},
+        {"uncompressed_hash_of_compressed_files",       std::make_shared<DataTypeString>()},
+
+        {"delete_ttl_info_min",                         std::make_shared<DataTypeDateTime>()},
+        {"delete_ttl_info_max",                         std::make_shared<DataTypeDateTime>()},
+
+        {"move_ttl_info.expression",                    std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"move_ttl_info.min",                           std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"move_ttl_info.max",                           std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+
+        {"default_compression_codec",                   std::make_shared<DataTypeString>()},
+
+        {"recompression_ttl_info.expression",           std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"recompression_ttl_info.min",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"recompression_ttl_info.max",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+
+        {"group_by_ttl_info.expression",                std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"group_by_ttl_info.min",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"group_by_ttl_info.max",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+
+        {"rows_where_ttl_info.expression",              std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+        {"rows_where_ttl_info.min",                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+        {"rows_where_ttl_info.max",                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
+
+        {"projections",                                 std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
+
+        {"visible",                                     std::make_shared<DataTypeUInt8>()},
+        {"creation_tid",                                getTransactionIDDataType()},
+        {"removal_tid_lock",                            std::make_shared<DataTypeUInt64>()},
+        {"removal_tid",                                 getTransactionIDDataType()},
+        {"creation_csn",                                std::make_shared<DataTypeUInt64>()},
+        {"removal_csn",                                 std::make_shared<DataTypeUInt64>()},
+
+        {"has_lightweight_delete",                      std::make_shared<DataTypeUInt8>()},
+
+        {"last_removal_attempt_time",                    std::make_shared<DataTypeDateTime>()},
+        {"removal_state",                               std::make_shared<DataTypeString>()},
+    };
+    return names_and_types;
+}
+
+
+void StorageSystemDroppedTablesParts::fillData(MutableColumns & columns, ContextPtr context, const SelectQueryInfo &) const
+{
+    auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped();
+
+    for (const auto & storage : tables_mark_dropped)
+    {
+        const auto * data = dynamic_cast<MergeTreeData *>(storage.table.get());
+        if (!data)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", storage.table->getName());
+
+        using State = MergeTreeData::DataPartState;
+
+        MergeTreeData::DataPartStateVector all_parts_state;
+        auto all_parts = data->getDataPartsVectorForInternalUsage({State::Active, State::Outdated}, &all_parts_state);
+
+        for (size_t part_number = 0; part_number < all_parts.size(); ++part_number)
+        {
+            const auto & part = all_parts[part_number];
+            auto part_state = all_parts_state[part_number];
+
+            ColumnSize columns_size = part->getTotalColumnsSize();
+            ColumnSize secondary_indexes_size = part->getTotalSeconaryIndicesSize();
+
+            size_t res_index = 0;
+
+            {
+                WriteBufferFromOwnString out;
+                part->partition.serializeText(*data, out, FormatSettings{});
+                columns[res_index++]->insert(out.str());
+            }
+            columns[res_index++]->insert(part->name);
+            columns[res_index++]->insert(part->uuid);
+            columns[res_index++]->insert(part->getTypeName());
+            columns[res_index++]->insert(part_state == State::Active);
+
+
+            columns[res_index++]->insert(part->getMarksCount());
+            columns[res_index++]->insert(part->rows_count);
+            columns[res_index++]->insert(part->getBytesOnDisk());
+            columns[res_index++]->insert(columns_size.data_compressed);
+            columns[res_index++]->insert(columns_size.data_uncompressed);
+            columns[res_index++]->insert(part->getIndexSizeFromFile());
+            columns[res_index++]->insert(columns_size.marks);
+            columns[res_index++]->insert(secondary_indexes_size.data_compressed);
+            columns[res_index++]->insert(secondary_indexes_size.data_uncompressed);
+            columns[res_index++]->insert(secondary_indexes_size.marks);
+            columns[res_index++]->insert(static_cast<UInt64>(part->modification_time));
+            {
+                time_t remove_time = part->remove_time.load(std::memory_order_relaxed);
+                columns[res_index++]->insert(static_cast<UInt64>(remove_time == std::numeric_limits<time_t>::max() ? 0 : remove_time));
+            }
+
+            /// For convenience, in returned refcount, don't add references that was due to local variables in this method: all_parts, active_parts.
+            columns[res_index++]->insert(static_cast<UInt64>(part.use_count() - 1));
+
+            auto min_max_date = part->getMinMaxDate();
+            auto min_max_time = part->getMinMaxTime();
+
+            columns[res_index++]->insert(min_max_date.first);
+            columns[res_index++]->insert(min_max_date.second);
+            columns[res_index++]->insert(static_cast<UInt32>(min_max_time.first));
+            columns[res_index++]->insert(static_cast<UInt32>(min_max_time.second));
+            columns[res_index++]->insert(part->info.partition_id);
+            columns[res_index++]->insert(part->info.min_block);
+            columns[res_index++]->insert(part->info.max_block);
+            columns[res_index++]->insert(part->info.level);
+            columns[res_index++]->insert(static_cast<UInt64>(part->info.getDataVersion()));
+            columns[res_index++]->insert(part->getIndexSizeInBytes());
+            columns[res_index++]->insert(part->getIndexSizeInAllocatedBytes());
+            columns[res_index++]->insert(part->is_frozen.load(std::memory_order_relaxed));
+
+            columns[res_index++]->insert(storage.table->getStorageID().getDatabaseName());
+            columns[res_index++]->insert(storage.table->getStorageID().getTableName());
+            columns[res_index++]->insert(storage.table->getName());
+
+            {
+                if (part->isStoredOnDisk())
+                    columns[res_index++]->insert(part->getDataPartStorage().getDiskName());
+                else
+                    columns[res_index++]->insertDefault();
+            }
+
+            /// The full path changes at clean up thread, so do not read it if parts can be deleted, avoid the race.
+            if (part->isStoredOnDisk()
+                && part_state != State::Deleting && part_state != State::DeleteOnDestroy && part_state != State::Temporary)
+            {
+                columns[res_index++]->insert(part->getDataPartStorage().getFullPath());
+            }
+            else
+                columns[res_index++]->insertDefault();
+
+
+            {
+                MinimalisticDataPartChecksums helper;
+                helper.computeTotalChecksums(part->checksums);
+
+                columns[res_index++]->insert(getHexUIntLowercase(helper.hash_of_all_files));
+                columns[res_index++]->insert(getHexUIntLowercase(helper.hash_of_uncompressed_files));
+                columns[res_index++]->insert(getHexUIntLowercase(helper.uncompressed_hash_of_compressed_files));
+            }
+
+            /// delete_ttl_info
+            columns[res_index++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.min));
+            columns[res_index++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.max));
+
+            auto add_ttl_info_map = [&](const TTLInfoMap & ttl_info_map)
+            {
+                Array expression_array;
+                Array min_array;
+                Array max_array;
+
+                expression_array.reserve(ttl_info_map.size());
+                min_array.reserve(ttl_info_map.size());
+                max_array.reserve(ttl_info_map.size());
+                for (const auto & [expression, ttl_info] : ttl_info_map)
+                {
+                    expression_array.emplace_back(expression);
+                    min_array.push_back(static_cast<UInt32>(ttl_info.min));
+                    max_array.push_back(static_cast<UInt32>(ttl_info.max));
+                }
+                columns[res_index++]->insert(expression_array);
+                columns[res_index++]->insert(min_array);
+                columns[res_index++]->insert(max_array);
+            };
+
+            add_ttl_info_map(part->ttl_infos.moves_ttl);
+
+            columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc()));
+
+            add_ttl_info_map(part->ttl_infos.recompression_ttl);
+            add_ttl_info_map(part->ttl_infos.group_by_ttl);
+            add_ttl_info_map(part->ttl_infos.rows_where_ttl);
+
+            Array projections;
+            for (const auto & [name, _] : part->getProjectionParts())
+                projections.push_back(name);
+
+            columns[res_index++]->insert(projections);
+
+            {
+                auto txn = context->getCurrentTransaction();
+                if (txn)
+                    columns[res_index++]->insert(part->version.isVisible(*txn));
+                else
+                    columns[res_index++]->insert(part_state == State::Active);
+            }
+
+            auto get_tid_as_field = [](const TransactionID & tid) -> Field
+            {
+                return Tuple{tid.start_csn, tid.local_tid, tid.host_id};
+            };
+
+            columns[res_index++]->insert(get_tid_as_field(part->version.creation_tid));
+            columns[res_index++]->insert(part->version.removal_tid_lock.load(std::memory_order_relaxed));
+            columns[res_index++]->insert(get_tid_as_field(part->version.getRemovalTID()));
+            columns[res_index++]->insert(part->version.creation_csn.load(std::memory_order_relaxed));
+            columns[res_index++]->insert(part->version.removal_csn.load(std::memory_order_relaxed));
+            columns[res_index++]->insert(part->hasLightweightDelete());
+            columns[res_index++]->insert(static_cast<UInt64>(part->last_removal_attempt_time.load(std::memory_order_relaxed)));
+            columns[res_index++]->insert(getRemovalStateDescription(part->removal_state.load(std::memory_order_relaxed)));
+
+        }
+        
+    }
+
+}
+
+
+}
diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.h b/src/Storages/System/StorageSystemDroppedTablesParts.h
new file mode 100644
index 00000000000..652d552007a
--- /dev/null
+++ b/src/Storages/System/StorageSystemDroppedTablesParts.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+
+namespace DB
+{
+
+class Context;
+
+
+/** Implements system table 'dropped_tables_parts' which allows to get information about data parts for dropped but not yet removed tables.
+  */
+class StorageSystemDroppedTablesParts final : public IStorageSystemOneBlock<StorageSystemDroppedTablesParts>
+{
+public:
+    std::string getName() const override { return "SystemDroppedTablesParts"; }
+    static NamesAndTypesList getNamesAndTypes();
+
+protected:
+    using IStorageSystemOneBlock::IStorageSystemOneBlock;
+    void fillData(MutableColumns & columns, ContextPtr context, const SelectQueryInfo &) const override;
+};
+
+}
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index ca49a546b94..eb6cacfe50e 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -82,6 +82,7 @@
 #include <Storages/System/StorageSystemCertificates.h>
 #include <Storages/System/StorageSystemSchemaInferenceCache.h>
 #include <Storages/System/StorageSystemDroppedTables.h>
+#include <Storages/System/StorageSystemDroppedTablesParts.h>
 #include <Storages/System/StorageSystemZooKeeperConnection.h>
 #include <Storages/System/StorageSystemJemalloc.h>
 #include <Storages/System/StorageSystemScheduler.h>
@@ -156,6 +157,7 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database)
     attach<StorageSystemBackups>(context, system_database, "backups");
     attach<StorageSystemSchemaInferenceCache>(context, system_database, "schema_inference_cache");
     attach<StorageSystemDroppedTables>(context, system_database, "dropped_tables");
+    attach<StorageSystemDroppedTablesParts>(context, system_database, "dropped_tables_parts");
     attach<StorageSystemScheduler>(context, system_database, "scheduler");
 #if defined(__ELF__) && !defined(OS_FREEBSD)
     attach<StorageSystemSymbols>(context, system_database, "symbols");

From 1276c3ab80645d31cd93ffd13af8d3213c7a2a03 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 6 Dec 2023 08:47:59 +0000
Subject: [PATCH 026/137] Try to fix broken links

---
 docs/en/engines/table-engines/index.md                       | 1 -
 .../mergetree-family/custom-partitioning-key.md              | 2 +-
 docs/en/operations/system-tables/tables.md                   | 2 +-
 docs/en/sql-reference/statements/optimize.md                 | 2 +-
 docs/en/sql-reference/statements/select/order-by.md          | 5 +++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md
index b024820024a..5e81eacc937 100644
--- a/docs/en/engines/table-engines/index.md
+++ b/docs/en/engines/table-engines/index.md
@@ -67,7 +67,6 @@ Engines in the family:
 Engines in the family:
 
 - [Distributed](../../engines/table-engines/special/distributed.md#distributed)
-- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview)
 - [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary)
 - [Merge](../../engines/table-engines/special/merge.md#merge)
 - [File](../../engines/table-engines/special/file.md#file)
diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
index 97d37e476ae..23d98d4b20e 100644
--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@@ -12,7 +12,7 @@ In most cases you do not need a partition key, and in most other cases you do no
 You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression.
 :::
 
-Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
+Partitioning is available for the [MergeTree family tables](../../../engines/table-engines/mergetree-family/mergetree.md), including [replicated tables](../../../engines/table-engines/mergetree-family/replication.md) and [materialized views](../../../sql-reference/statements/create/view.md#materialized-view).
 
 A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. Partitions improve performance for queries containing a partitioning key because ClickHouse will filter for that partition before selecting the parts and granules within the partition.
 
diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md
index e4461e14236..01558f4fbd9 100644
--- a/docs/en/operations/system-tables/tables.md
+++ b/docs/en/operations/system-tables/tables.md
@@ -29,7 +29,7 @@ Columns:
 
 - `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.
 
-- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
+- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([materialized views](../../sql-reference/statements/create/view.md#materialized-view) the current table).
 
 - `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table.
 
diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md
index 07b5a196096..b5fc0a23745 100644
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@@ -17,7 +17,7 @@ This query tries to initialize an unscheduled merge of data parts for tables. No
 OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
 ```
 
-The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.
+The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family (including [materialized views](../../sql-reference/statements/create/view.md#materialized-view)) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.
 
 When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `2`) or on current replica (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `1`).
 
diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index 53bdc9041a1..b96ebff265d 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -265,8 +265,9 @@ Consider disabling `optimize_read_in_order` manually, when running queries that
 
 Optimization is supported in the following table engines:
 
-- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)
-- [Merge](../../../engines/table-engines/special/merge.md), [Buffer](../../../engines/table-engines/special/buffer.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) table engines over `MergeTree`-engine tables
+- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (including [materialized views](../../../sql-reference/statements/create/view.md#materialized-view)),
+- [Merge](../../../engines/table-engines/special/merge.md),
+- [Buffer](../../../engines/table-engines/special/buffer.md)
 
 In `MaterializedView`-engine tables the optimization works with views like `SELECT ... FROM merge_tree_table ORDER BY pk`. But it is not supported in the queries like `SELECT ... FROM view ORDER BY pk` if the view query does not have the `ORDER BY` clause.
 

From 8f00678abea1f02724025d23ca1fcf8be6b2f4c0 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 6 Dec 2023 13:24:25 +0000
Subject: [PATCH 027/137] fix style

---
 src/Storages/System/StorageSystemDroppedTablesParts.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
index 10d47371e5b..1770610c7df 100644
--- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp
+++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
@@ -294,7 +294,7 @@ void StorageSystemDroppedTablesParts::fillData(MutableColumns & columns, Context
             columns[res_index++]->insert(getRemovalStateDescription(part->removal_state.load(std::memory_order_relaxed)));
 
         }
-        
+
     }
 
 }

From cb2bc710bc9560a07652a895b1c6644d46ad85fe Mon Sep 17 00:00:00 2001
From: Chen Lixiang <chenlixiang.dev@gmail.com>
Date: Thu, 7 Dec 2023 21:23:23 +0800
Subject: [PATCH 028/137] fix

---
 .../0_stateless/00753_system_columns_and_system_tables_long.sql | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
index fd9ba586580..c21f5c12b2f 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
@@ -68,8 +68,6 @@ FORMAT PrettyCompactNoEscapes;
 
 DROP TABLE IF EXISTS check_system_tables;
 
-
-
 SELECT 'Check total_bytes/total_rows for TinyLog';
 CREATE TABLE check_system_tables (key UInt8) ENGINE = TinyLog();
 SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase();

From 89d4e639673505231c82776fea2e202c49b90940 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 7 Dec 2023 16:17:10 +0000
Subject: [PATCH 029/137] fix some tests

---
 ...731_long_merge_tree_select_opened_files.sh |   2 +-
 .../00877_memory_limit_for_new_delete.sql     |   1 +
 .../02096_totals_global_in_bug.sql            |   3 +-
 .../0_stateless/02163_shard_num.reference     |  12 +-
 tests/queries/0_stateless/02163_shard_num.sql |  10 +-
 .../02480_max_map_null_totals.reference       |  96 +++---
 .../0_stateless/02480_max_map_null_totals.sql |  54 ++--
 .../02496_remove_redundant_sorting.reference  |  68 ++--
 .../02496_remove_redundant_sorting.sh         |  12 +-
 ...emove_redundant_sorting_analyzer.reference |  76 +++--
 .../02500_remove_redundant_distinct.reference | 290 +++++++++--------
 .../02500_remove_redundant_distinct.sh        |  26 +-
 ...move_redundant_distinct_analyzer.reference | 294 ++++++++++--------
 13 files changed, 529 insertions(+), 415 deletions(-)

diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh
index 11396dd34eb..1bb4dbd34de 100755
--- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh
+++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-settings="--log_queries=1 --log_query_threads=1 --log_profile_events=1 --log_query_settings=1 --allow_deprecated_syntax_for_merge_tree=1"
+settings="--log_queries=1 --log_query_threads=1 --log_profile_events=1 --log_query_settings=1 --allow_deprecated_syntax_for_merge_tree=1 --max_bytes_before_external_group_by 0 --max_bytes_before_external_sort 0"
 
 # Test insert logging on each block and checkPacket() method
 
diff --git a/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql b/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql
index 3864293751f..8eb9d83b730 100644
--- a/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql
+++ b/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql
@@ -2,6 +2,7 @@
 -- Tag no-msan: memory limits don't work correctly under msan because it replaces malloc/free
 
 SET max_memory_usage = 1000000000;
+SET max_bytes_before_external_group_by = 0;
 
 SELECT sum(ignore(*)) FROM (
     SELECT number, argMax(number, (number, toFixedString(toString(number), 1024)))
diff --git a/tests/queries/0_stateless/02096_totals_global_in_bug.sql b/tests/queries/0_stateless/02096_totals_global_in_bug.sql
index ac4f2b9d2ba..27ca26cf141 100644
--- a/tests/queries/0_stateless/02096_totals_global_in_bug.sql
+++ b/tests/queries/0_stateless/02096_totals_global_in_bug.sql
@@ -1,2 +1 @@
-select sum(number) from remote('127.0.0.{2,3}', numbers(2)) where number global in (select sum(number) from numbers(2) group by number with totals) group by number with totals
-
+select sum(number) from remote('127.0.0.{2,3}', numbers(2)) where number global in (select sum(number) from numbers(2) group by number with totals) group by number with totals order by number;
diff --git a/tests/queries/0_stateless/02163_shard_num.reference b/tests/queries/0_stateless/02163_shard_num.reference
index 77eea7c95b9..d79b95024f6 100644
--- a/tests/queries/0_stateless/02163_shard_num.reference
+++ b/tests/queries/0_stateless/02163_shard_num.reference
@@ -1,18 +1,18 @@
 -- { echoOn }
 
-SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num;
-2	1
+SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num ORDER BY _shard_num;
 1	1
-SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num;
 2	1
+SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num ORDER BY shard_num;
 1	1
-SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num;
 2	1
+SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num ORDER BY _shard_num;
 1	1
-SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num;
 2	1
+SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num ORDER BY shard_num;
 1	1
-SELECT a._shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) a GROUP BY shard_num;
 2	1
+SELECT a._shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) a GROUP BY shard_num ORDER BY shard_num;
 1	1
+2	1
 SELECT _shard_num FROM remote('127.1', system.one) AS a INNER JOIN (SELECT _shard_num FROM system.one) AS b USING (dummy); -- { serverError UNSUPPORTED_METHOD, UNKNOWN_IDENTIFIER }
diff --git a/tests/queries/0_stateless/02163_shard_num.sql b/tests/queries/0_stateless/02163_shard_num.sql
index cc87140ebaf..d3b4a95c6a8 100644
--- a/tests/queries/0_stateless/02163_shard_num.sql
+++ b/tests/queries/0_stateless/02163_shard_num.sql
@@ -1,10 +1,10 @@
 -- { echoOn }
 
-SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num;
-SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num;
-SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num;
-SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num;
-SELECT a._shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) a GROUP BY shard_num;
+SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num ORDER BY _shard_num;
+SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num ORDER BY shard_num;
+SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num ORDER BY _shard_num;
+SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num ORDER BY shard_num;
+SELECT a._shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) a GROUP BY shard_num ORDER BY shard_num;
 SELECT _shard_num FROM remote('127.1', system.one) AS a INNER JOIN (SELECT _shard_num FROM system.one) AS b USING (dummy); -- { serverError UNSUPPORTED_METHOD, UNKNOWN_IDENTIFIER }
 
 -- { echoOff }
diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.reference b/tests/queries/0_stateless/02480_max_map_null_totals.reference
index 5cc9b5a495f..2fb87a76157 100644
--- a/tests/queries/0_stateless/02480_max_map_null_totals.reference
+++ b/tests/queries/0_stateless/02480_max_map_null_totals.reference
@@ -1,119 +1,119 @@
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
 ([-1,0,1,2],[0,0,0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
-([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
+([-1,0],[0,0])
 ([-1,0,1,2],[0,0,0,2])
-([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
+([-1,0],[0,0])
+([0,1],[0,1])
+([1,2],[0,2])
 
 ([-1,0,1,2],[0,0,0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
+([0,1],[0,1])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
+([0,1],[0,1])
+([1,2],[0,2])
 ([0],[0])
-([2],[2])
 ([1],[1])
+([2],[2])
 
 ([0,2],[0,2])
 ([0],[0])
-([2],[2])
-([1],[1])
 ([0,2],[0,2])
+([1],[1])
+([2],[2])
 ([0],[0])
-([2],[2])
-([1],[1])
 ([0,2],[0,2])
+([1],[1])
+([2],[2])
 -
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
 ([-1,0,1,2],[0,0,0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
-([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
+([-1,0],[0,0])
 ([-1,0,1,2],[0,0,0,2])
-([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
+([-1,0],[0,0])
+([0,1],[0,1])
+([1,2],[0,2])
 
 ([-1,0,1,2],[0,0,0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
+([0,1],[0,1])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
+([0,1],[0,1])
+([1,2],[0,2])
 ([0],[0])
-([2],[2])
 ([1],[1])
+([2],[2])
 
 ([0,2],[0,2])
 ([0],[0])
-([2],[2])
-([1],[1])
 ([0,2],[0,2])
+([1],[1])
+([2],[2])
 ([0],[0])
-([2],[2])
-([1],[1])
 ([0,2],[0,2])
+([1],[1])
+([2],[2])
 -
 ([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
 
 ([-1,0,1,2],[0,0,0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
-([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
+([-1,0],[0,0])
 ([-1,0,1,2],[0,0,0,2])
-([-1,0],[0,0])
-([1,2],[0,2])
 ([0,1],[0,1])
+([1,2],[0,2])
+([-1,0],[0,0])
+([0,1],[0,1])
+([1,2],[0,2])
 
 ([-1,0,1,2],[0,0,0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
+([0,1],[0,1])
+([1,2],[0,2])
 ([-1,0],[0,0])
-([1,2],[0,2])
-([0,1],[0,1])
 ([-1,0,1,2],[0,0,0,2])
+([0,1],[0,1])
+([1,2],[0,2])
 ([0],[0])
-([2],[2])
 ([1],[1])
+([2],[2])
 
 ([0,2],[0,2])
 ([0],[0])
-([2],[2])
-([1],[1])
 ([0,2],[0,2])
+([1],[1])
+([2],[2])
 ([0],[0])
-([2],[2])
-([1],[1])
 ([0,2],[0,2])
+([1],[1])
+([2],[2])
diff --git a/tests/queries/0_stateless/02480_max_map_null_totals.sql b/tests/queries/0_stateless/02480_max_map_null_totals.sql
index 81e2a5c4243..be2c566ddc1 100644
--- a/tests/queries/0_stateless/02480_max_map_null_totals.sql
+++ b/tests/queries/0_stateless/02480_max_map_null_totals.sql
@@ -1,39 +1,39 @@
-SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
 SELECT '-';
 
-SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: Float64, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
 SELECT '-';
 
-SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT maxMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT minMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
 
-SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS;
-SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP;
-SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH TOTALS ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH ROLLUP ORDER BY number;
+SELECT sumMap([number % 3, number % 4 - 1], [number :: UInt256, NULL]) FROM numbers(3) GROUP BY number WITH CUBE ORDER BY number;
diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
index b318157835d..b38cf176008 100644
--- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
+++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
@@ -113,27 +113,26 @@ FROM
     )
     ORDER BY number DESC
 ) AS t2
+ORDER BY number
 -- explain
-Expression ((Projection + Before ORDER BY))
-  Join (JOIN FillRightFirst)
-    Expression ((Before JOIN + Projection))
-      Sorting (Sorting for ORDER BY)
-        Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
+Expression (Projection)
+  Sorting (Sorting for ORDER BY)
+    Expression (Before ORDER BY)
+      Join (JOIN FillRightFirst)
+        Expression ((Before JOIN + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
           ReadFromSystemNumbers
-    Expression ((Joined actions + (Rename joined columns + Projection)))
-      Sorting (Sorting for ORDER BY)
-        Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
+        Expression ((Joined actions + (Rename joined columns + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))))
           ReadFromSystemNumbers
 -- execute
-0	2
-0	1
 0	0
-1	2
-1	1
+0	1
+0	2
 1	0
-2	2
-2	1
+1	1
+1	2
 2	0
+2	1
+2	2
 -- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries
 -- query
 SELECT *
@@ -193,15 +192,18 @@ FROM
     ORDER BY number DESC
 )
 GROUP BY number
+ORDER BY number
 -- explain
-Expression ((Projection + Before ORDER BY))
-  Aggregating
-    Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
-      ReadFromSystemNumbers
+Expression (Projection)
+  Sorting (Sorting for ORDER BY)
+    Expression (Before ORDER BY)
+      Aggregating
+        Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
+          ReadFromSystemNumbers
 -- execute
 0
-2
 1
+2
 -- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery
 -- query
 SELECT any(number)
@@ -217,15 +219,18 @@ FROM
     ORDER BY number DESC
 )
 GROUP BY number
+ORDER BY number
 -- explain
-Expression ((Projection + Before ORDER BY))
-  Aggregating
-    Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
-      ReadFromSystemNumbers
+Expression (Projection)
+  Sorting (Sorting for ORDER BY)
+    Expression (Before ORDER BY)
+      Aggregating
+        Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))
+          ReadFromSystemNumbers
 -- execute
 0
-2
 1
+2
 -- query with aggregation function but w/o GROUP BY -> remove sorting
 -- query
 SELECT sum(number)
@@ -315,15 +320,18 @@ FROM
     GROUP BY number
 )
 WHERE a > 0
+ORDER BY a
 -- explain
-Expression ((Projection + (Before ORDER BY + )))
-  Aggregating
-    Filter
-      Filter (( + (Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))))
-        ReadFromSystemNumbers
+Expression (Projection)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + ))
+      Aggregating
+        Filter
+          Filter (( + (Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))))
+            ReadFromSystemNumbers
 -- execute
-2
 1
+2
 -- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps
 -- query
 SELECT *
diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh
index 8b529c26d93..d35892432a5 100755
--- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh
+++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh
@@ -96,7 +96,8 @@ FROM
         ORDER BY number ASC
     )
     ORDER BY number DESC
-) AS t2"
+) AS t2
+ORDER BY number"
 run_query "$query"
 
 echo "-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries"
@@ -138,7 +139,8 @@ FROM
     )
     ORDER BY number DESC
 )
-GROUP BY number"
+GROUP BY number
+ORDER BY number"
 run_query "$query"
 
 echo "-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery"
@@ -154,7 +156,8 @@ FROM
     )
     ORDER BY number DESC
 )
-GROUP BY number"
+GROUP BY number
+ORDER BY number"
 run_query "$query"
 
 echo "-- query with aggregation function but w/o GROUP BY -> remove sorting"
@@ -218,7 +221,8 @@ FROM
     )
     GROUP BY number
 )
-WHERE a > 0"
+WHERE a > 0
+ORDER BY a"
 run_query "$query"
 
 echo "-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps"
diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference
index ee2099c62ba..b2ac9e4533b 100644
--- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference
+++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference
@@ -113,27 +113,26 @@ FROM
     )
     ORDER BY number DESC
 ) AS t2
+ORDER BY number
 -- explain
-Expression ((Project names + (Projection + DROP unused columns after JOIN)))
-  Join (JOIN FillRightFirst)
-    Expression ((Change column names to column identifiers + Project names))
-      Sorting (Sorting for ORDER BY)
-        Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
+Expression (Project names)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + (Projection + DROP unused columns after JOIN)))
+      Join (JOIN FillRightFirst)
+        Expression ((Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))))
           ReadFromSystemNumbers
-    Expression ((Change column names to column identifiers + Project names))
-      Sorting (Sorting for ORDER BY)
-        Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
+        Expression ((Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))))
           ReadFromSystemNumbers
 -- execute
-0	2
-0	1
 0	0
-1	2
-1	1
+0	1
+0	2
 1	0
-2	2
-2	1
+1	1
+1	2
 2	0
+2	1
+2	2
 -- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries
 -- query
 SELECT *
@@ -193,15 +192,18 @@ FROM
     ORDER BY number DESC
 )
 GROUP BY number
+ORDER BY number
 -- explain
-Expression ((Project names + Projection))
-  Aggregating
-    Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))))))
-      ReadFromSystemNumbers
+Expression (Project names)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + Projection))
+      Aggregating
+        Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))))))
+          ReadFromSystemNumbers
 -- execute
 0
-2
 1
+2
 -- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery
 -- query
 SELECT any(number)
@@ -217,17 +219,20 @@ FROM
     ORDER BY number DESC
 )
 GROUP BY number
+ORDER BY number
 -- explain
-Expression ((Project names + Projection))
-  Aggregating
-    Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
-      Sorting (Sorting for ORDER BY)
-        Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
-          ReadFromSystemNumbers
+Expression (Project names)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + Projection))
+      Aggregating
+        Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
+          Sorting (Sorting for ORDER BY)
+            Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
+              ReadFromSystemNumbers
 -- execute
 0
-2
 1
+2
 -- query with aggregation function but w/o GROUP BY -> remove sorting
 -- query
 SELECT sum(number)
@@ -319,17 +324,20 @@ FROM
     GROUP BY number
 )
 WHERE a > 0
+ORDER BY a
 -- explain
-Expression ((Project names + Projection))
-  Filter ((WHERE + (Change column names to column identifiers + (Project names + Projection))))
-    Aggregating
-      Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
-        Sorting (Sorting for ORDER BY)
-          Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
-            ReadFromSystemNumbers
+Expression (Project names)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + Projection))
+      Filter ((WHERE + (Change column names to column identifiers + (Project names + Projection))))
+        Aggregating
+          Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
+            Sorting (Sorting for ORDER BY)
+              Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))
+                ReadFromSystemNumbers
 -- execute
-2
 1
+2
 -- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps
 -- query
 SELECT *
diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
index 3f580763dba..5348d407097 100644
--- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
+++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
@@ -73,22 +73,24 @@ FROM
     SELECT DISTINCT number AS n
     FROM numbers(2)
 ) as y
+ORDER BY n
 -- explain
 Expression (Projection)
   Distinct
-    Distinct (Preliminary DISTINCT)
-      Expression (Before ORDER BY)
-        Join (JOIN FillRightFirst)
-          Expression ((Before JOIN + Projection))
-            Distinct
-              Distinct (Preliminary DISTINCT)
-                Expression (Before ORDER BY)
-                  ReadFromSystemNumbers
-          Expression ((Joined actions + (Rename joined columns + Projection)))
-            Distinct
-              Distinct (Preliminary DISTINCT)
-                Expression (Before ORDER BY)
-                  ReadFromSystemNumbers
+    Sorting (Sorting for ORDER BY)
+      Distinct (Preliminary DISTINCT)
+        Expression (Before ORDER BY)
+          Join (JOIN FillRightFirst)
+            Expression ((Before JOIN + Projection))
+              Distinct
+                Distinct (Preliminary DISTINCT)
+                  Expression (Before ORDER BY)
+                    ReadFromSystemNumbers
+            Expression ((Joined actions + (Rename joined columns + Projection)))
+              Distinct
+                Distinct (Preliminary DISTINCT)
+                  Expression (Before ORDER BY)
+                    ReadFromSystemNumbers
 -- execute
 0	0
 0	1
@@ -106,12 +108,15 @@ FROM
         FROM numbers(3)
     )
 )
+ORDER BY a, b
 -- explain
-Expression ((Projection + (Before ORDER BY + (Projection + (Before ORDER BY + Projection)))))
-  Distinct
-    Distinct (Preliminary DISTINCT)
-      Expression (Before ORDER BY)
-        ReadFromSystemNumbers
+Expression (Projection)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + (Projection + (Before ORDER BY + Projection))))
+      Distinct
+        Distinct (Preliminary DISTINCT)
+          Expression (Before ORDER BY)
+            ReadFromSystemNumbers
 -- execute
 0	0
 1	2
@@ -128,12 +133,15 @@ FROM
         FROM numbers(3)
     )
 )
+ORDER BY a, b
 -- explain
-Expression ((Projection + (Before ORDER BY + (Projection + (Before ORDER BY + Projection)))))
-  Distinct
-    Distinct (Preliminary DISTINCT)
-      Expression (Before ORDER BY)
-        ReadFromSystemNumbers
+Expression (Projection)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + (Projection + (Before ORDER BY + Projection))))
+      Distinct
+        Distinct (Preliminary DISTINCT)
+          Expression (Before ORDER BY)
+            ReadFromSystemNumbers
 -- execute
 2	0	0
 2	1	2
@@ -147,17 +155,19 @@ FROM
     FROM VALUES('Hello', 'World', 'Goodbye')
 ) AS words
 ARRAY JOIN [0, 1] AS arr
+ORDER BY arr
 -- explain
 Expression (Projection)
   Distinct
-    Distinct (Preliminary DISTINCT)
-      Expression (Before ORDER BY)
-        ArrayJoin (ARRAY JOIN)
-          Expression ((Before ARRAY JOIN + Projection))
-            Distinct
-              Distinct (Preliminary DISTINCT)
-                Expression (Before ORDER BY)
-                  ReadFromStorage (Values)
+    Sorting (Sorting for ORDER BY)
+      Distinct (Preliminary DISTINCT)
+        Expression (Before ORDER BY)
+          ArrayJoin (ARRAY JOIN)
+            Expression ((Before ARRAY JOIN + Projection))
+              Distinct
+                Distinct (Preliminary DISTINCT)
+                  Expression (Before ORDER BY)
+                    ReadFromStorage (Values)
 -- execute
 Hello
 World
@@ -194,16 +204,18 @@ FROM
     SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities
 )
 WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim']
+ORDER BY cities
 -- explain
 Expression (( + Projection))
   Distinct
-    Distinct (Preliminary DISTINCT)
-      Expression (Before ORDER BY)
-        Filter ((WHERE + Projection))
-          Distinct
-            Distinct (Preliminary DISTINCT)
-              Expression (Before ORDER BY)
-                ReadFromStorage (SystemOne)
+    Sorting (Sorting for ORDER BY)
+      Distinct (Preliminary DISTINCT)
+        Expression (Before ORDER BY)
+          Filter ((WHERE + Projection))
+            Distinct
+              Distinct (Preliminary DISTINCT)
+                Expression (Before ORDER BY)
+                  ReadFromStorage (SystemOne)
 -- execute
 ['Istanbul','Berlin','Bensheim']
 -- GROUP BY before DISTINCT with on the same columns => remove DISTINCT
@@ -222,20 +234,23 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a
+    ORDER BY a
 )
 -- explain
-Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
-  Aggregating
-    Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-      Join (JOIN FillRightFirst)
-        Expression (Before JOIN)
-          ReadFromSystemNumbers
-        Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
-          ReadFromSystemNumbers
+Expression ((Projection + (Before ORDER BY + Projection)))
+  Sorting (Sorting for ORDER BY)
+    Expression (Before ORDER BY)
+      Aggregating
+        Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
+          Join (JOIN FillRightFirst)
+            Expression (Before JOIN)
+              ReadFromSystemNumbers
+            Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
+              ReadFromSystemNumbers
 -- execute
 0
-2
 1
+2
 -- GROUP BY before DISTINCT with on different columns => do _not_ remove DISTINCT
 -- query
 SELECT DISTINCT c
@@ -252,19 +267,22 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a
+    ORDER BY a
 )
 -- explain
 Expression (Projection)
   Distinct
     Distinct (Preliminary DISTINCT)
-      Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
-        Aggregating
-          Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-            Join (JOIN FillRightFirst)
-              Expression (Before JOIN)
-                ReadFromSystemNumbers
-              Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
-                ReadFromSystemNumbers
+      Expression ((Before ORDER BY + Projection))
+        Sorting (Sorting for ORDER BY)
+          Expression (Before ORDER BY)
+            Aggregating
+              Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
+                Join (JOIN FillRightFirst)
+                  Expression (Before JOIN)
+                    ReadFromSystemNumbers
+                  Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
+                    ReadFromSystemNumbers
 -- execute
 12
 -- GROUP BY WITH ROLLUP before DISTINCT with on different columns => do _not_ remove DISTINCT
@@ -283,20 +301,23 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH ROLLUP
+    ORDER BY a
 )
 -- explain
 Expression (Projection)
   Distinct
     Distinct (Preliminary DISTINCT)
-      Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
-        Rollup
-          Aggregating
-            Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (JOIN FillRightFirst)
-                Expression (Before JOIN)
-                  ReadFromSystemNumbers
-                Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
-                  ReadFromSystemNumbers
+      Expression ((Before ORDER BY + Projection))
+        Sorting (Sorting for ORDER BY)
+          Expression (Before ORDER BY)
+            Rollup
+              Aggregating
+                Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
+                  Join (JOIN FillRightFirst)
+                    Expression (Before JOIN)
+                      ReadFromSystemNumbers
+                    Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
+                      ReadFromSystemNumbers
 -- execute
 12
 36
@@ -316,22 +337,25 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH ROLLUP
+    ORDER BY a
 )
 -- explain
-Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
-  Rollup
-    Aggregating
-      Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (JOIN FillRightFirst)
-          Expression (Before JOIN)
-            ReadFromSystemNumbers
-          Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
-            ReadFromSystemNumbers
+Expression ((Projection + (Before ORDER BY + Projection)))
+  Sorting (Sorting for ORDER BY)
+    Expression (Before ORDER BY)
+      Rollup
+        Aggregating
+          Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
+            Join (JOIN FillRightFirst)
+              Expression (Before JOIN)
+                ReadFromSystemNumbers
+              Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
+                ReadFromSystemNumbers
 -- execute
 0
-2
-1
 0
+1
+2
 -- GROUP BY WITH CUBE before DISTINCT with on different columns => do _not_ remove DISTINCT
 -- query
 SELECT DISTINCT c
@@ -348,20 +372,23 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH CUBE
+    ORDER BY a
 )
 -- explain
 Expression (Projection)
   Distinct
     Distinct (Preliminary DISTINCT)
-      Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
-        Cube
-          Aggregating
-            Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (JOIN FillRightFirst)
-                Expression (Before JOIN)
-                  ReadFromSystemNumbers
-                Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
-                  ReadFromSystemNumbers
+      Expression ((Before ORDER BY + Projection))
+        Sorting (Sorting for ORDER BY)
+          Expression (Before ORDER BY)
+            Cube
+              Aggregating
+                Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
+                  Join (JOIN FillRightFirst)
+                    Expression (Before JOIN)
+                      ReadFromSystemNumbers
+                    Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
+                      ReadFromSystemNumbers
 -- execute
 12
 36
@@ -381,22 +408,25 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH CUBE
+    ORDER BY a
 )
 -- explain
-Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
-  Cube
-    Aggregating
-      Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (JOIN FillRightFirst)
-          Expression (Before JOIN)
-            ReadFromSystemNumbers
-          Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
-            ReadFromSystemNumbers
+Expression ((Projection + (Before ORDER BY + Projection)))
+  Sorting (Sorting for ORDER BY)
+    Expression (Before ORDER BY)
+      Cube
+        Aggregating
+          Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
+            Join (JOIN FillRightFirst)
+              Expression (Before JOIN)
+                ReadFromSystemNumbers
+              Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
+                ReadFromSystemNumbers
 -- execute
 0
-2
-1
 0
+1
+2
 -- GROUP BY WITH TOTALS before DISTINCT with on different columns => do _not_ remove DISTINCT
 -- query
 SELECT DISTINCT c
@@ -413,20 +443,23 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH TOTALS
+    ORDER BY a
 )
 -- explain
 Expression (Projection)
   Distinct
     Distinct (Preliminary DISTINCT)
-      Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
-        TotalsHaving
-          Aggregating
-            Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-              Join (JOIN FillRightFirst)
-                Expression (Before JOIN)
-                  ReadFromSystemNumbers
-                Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
-                  ReadFromSystemNumbers
+      Expression ((Before ORDER BY + Projection))
+        Sorting (Sorting for ORDER BY)
+          Expression (Before ORDER BY)
+            TotalsHaving
+              Aggregating
+                Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
+                  Join (JOIN FillRightFirst)
+                    Expression (Before JOIN)
+                      ReadFromSystemNumbers
+                    Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
+                      ReadFromSystemNumbers
 -- execute
 12
 
@@ -447,21 +480,24 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH TOTALS
+    ORDER BY a
 )
 -- explain
-Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY))))
-  TotalsHaving
-    Aggregating
-      Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
-        Join (JOIN FillRightFirst)
-          Expression (Before JOIN)
-            ReadFromSystemNumbers
-          Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
-            ReadFromSystemNumbers
+Expression ((Projection + (Before ORDER BY + Projection)))
+  Sorting (Sorting for ORDER BY)
+    Expression (Before ORDER BY)
+      TotalsHaving
+        Aggregating
+          Expression ((Before GROUP BY + (Projection + Before ORDER BY)))
+            Join (JOIN FillRightFirst)
+              Expression (Before JOIN)
+                ReadFromSystemNumbers
+              Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))
+                ReadFromSystemNumbers
 -- execute
 0
-2
 1
+2
 
 0
 -- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT
@@ -488,21 +524,23 @@ FROM
     SELECT DISTINCT number
     FROM numbers(2)
 )
+ORDER BY number
 -- explain
 Expression (Projection)
   Distinct
-    Distinct (Preliminary DISTINCT)
-      Union
-        Expression ((Before ORDER BY + Projection))
-          Distinct
-            Distinct (Preliminary DISTINCT)
-              Expression (Before ORDER BY)
-                ReadFromSystemNumbers
-        Expression (( + Projection))
-          Distinct
-            Distinct (Preliminary DISTINCT)
-              Expression (Before ORDER BY)
-                ReadFromSystemNumbers
+    Sorting (Sorting for ORDER BY)
+      Distinct (Preliminary DISTINCT)
+        Union
+          Expression ((Before ORDER BY + Projection))
+            Distinct
+              Distinct (Preliminary DISTINCT)
+                Expression (Before ORDER BY)
+                  ReadFromSystemNumbers
+          Expression (( + Projection))
+            Distinct
+              Distinct (Preliminary DISTINCT)
+                Expression (Before ORDER BY)
+                  ReadFromSystemNumbers
 -- execute
 0
 1
diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh
index f07cdca4b5a..f83fcff07c1 100755
--- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh
+++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh
@@ -59,7 +59,8 @@ FROM
 (
     SELECT DISTINCT number AS n
     FROM numbers(2)
-) as y"
+) as y
+ORDER BY n"
 run_query "$query"
 
 echo "-- DISTINCT duplicates with several columns"
@@ -72,7 +73,8 @@ FROM
         SELECT DISTINCT number as a, 2*number as b
         FROM numbers(3)
     )
-)"
+)
+ORDER BY a, b"
 run_query "$query"
 
 echo "-- DISTINCT duplicates with constant columns"
@@ -85,7 +87,8 @@ FROM
         SELECT DISTINCT 1, number as a, 2*number as b
         FROM numbers(3)
     )
-)"
+)
+ORDER BY a, b"
 run_query "$query"
 
 echo "-- ARRAY JOIN: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
@@ -95,7 +98,8 @@ FROM
     SELECT DISTINCT *
     FROM VALUES('Hello', 'World', 'Goodbye')
 ) AS words
-ARRAY JOIN [0, 1] AS arr"
+ARRAY JOIN [0, 1] AS arr
+ORDER BY arr"
 run_query "$query"
 
 echo "-- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
@@ -114,7 +118,8 @@ FROM
 (
     SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities
 )
-WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim']"
+WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim']
+ORDER BY cities"
 run_query "$query"
 
 echo "-- GROUP BY before DISTINCT with on the same columns => remove DISTINCT"
@@ -132,6 +137,7 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a
+    ORDER BY a
 )"
 run_query "$query"
 
@@ -150,6 +156,7 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a
+    ORDER BY a
 )"
 run_query "$query"
 
@@ -168,6 +175,7 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH ROLLUP
+    ORDER BY a
 )"
 run_query "$query"
 
@@ -186,6 +194,7 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH ROLLUP
+    ORDER BY a
 )"
 run_query "$query"
 
@@ -204,6 +213,7 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH CUBE
+    ORDER BY a
 )"
 run_query "$query"
 
@@ -222,6 +232,7 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH CUBE
+    ORDER BY a
 )"
 run_query "$query"
 
@@ -240,6 +251,7 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH TOTALS
+    ORDER BY a
 )"
 run_query "$query"
 
@@ -258,6 +270,7 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH TOTALS
+    ORDER BY a
 )"
 run_query "$query"
 
@@ -274,5 +287,6 @@ FROM
     UNION ALL
     SELECT DISTINCT number
     FROM numbers(2)
-)"
+)
+ORDER BY number"
 run_query "$query"
diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference
index a5f2c3e5ca3..798191db7e4 100644
--- a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference
+++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference
@@ -74,22 +74,25 @@ FROM
     SELECT DISTINCT number AS n
     FROM numbers(2)
 ) as y
+ORDER BY n
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
-    Distinct (Preliminary DISTINCT)
-      Expression ((Projection + DROP unused columns after JOIN))
-        Join (JOIN FillRightFirst)
-          Expression ((Change column names to column identifiers + Project names))
-            Distinct (DISTINCT)
-              Distinct (Preliminary DISTINCT)
-                Expression ((Projection + Change column names to column identifiers))
-                  ReadFromSystemNumbers
-          Expression ((Change column names to column identifiers + Project names))
-            Distinct (DISTINCT)
-              Distinct (Preliminary DISTINCT)
-                Expression ((Projection + Change column names to column identifiers))
-                  ReadFromSystemNumbers
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        Distinct (Preliminary DISTINCT)
+          Expression ((Projection + DROP unused columns after JOIN))
+            Join (JOIN FillRightFirst)
+              Expression ((Change column names to column identifiers + Project names))
+                Distinct (DISTINCT)
+                  Distinct (Preliminary DISTINCT)
+                    Expression ((Projection + Change column names to column identifiers))
+                      ReadFromSystemNumbers
+              Expression ((Change column names to column identifiers + Project names))
+                Distinct (DISTINCT)
+                  Distinct (Preliminary DISTINCT)
+                    Expression ((Projection + Change column names to column identifiers))
+                      ReadFromSystemNumbers
 -- execute
 0	0
 0	1
@@ -107,12 +110,15 @@ FROM
         FROM numbers(3)
     )
 )
+ORDER BY a, b
 -- explain
-Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names)))))))
-  Distinct (DISTINCT)
-    Distinct (Preliminary DISTINCT)
-      Expression ((Projection + Change column names to column identifiers))
-        ReadFromSystemNumbers
+Expression (Project names)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names)))))))
+      Distinct (DISTINCT)
+        Distinct (Preliminary DISTINCT)
+          Expression ((Projection + Change column names to column identifiers))
+            ReadFromSystemNumbers
 -- execute
 0	0
 1	2
@@ -129,12 +135,15 @@ FROM
         FROM numbers(3)
     )
 )
+ORDER BY a, b
 -- explain
-Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names)))))))
-  Distinct (DISTINCT)
-    Distinct (Preliminary DISTINCT)
-      Expression ((Projection + Change column names to column identifiers))
-        ReadFromSystemNumbers
+Expression (Project names)
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names)))))))
+      Distinct (DISTINCT)
+        Distinct (Preliminary DISTINCT)
+          Expression ((Projection + Change column names to column identifiers))
+            ReadFromSystemNumbers
 -- execute
 2	0	0
 2	1	2
@@ -148,17 +157,20 @@ FROM
     FROM VALUES('Hello', 'World', 'Goodbye')
 ) AS words
 ARRAY JOIN [0, 1] AS arr
+ORDER BY arr
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
-    Distinct (Preliminary DISTINCT)
-      Expression (Projection)
-        ArrayJoin (ARRAY JOIN)
-          Expression ((DROP unused columns before ARRAY JOIN + (ARRAY JOIN actions + (Change column names to column identifiers + Project names))))
-            Distinct (DISTINCT)
-              Distinct (Preliminary DISTINCT)
-                Expression ((Projection + Change column names to column identifiers))
-                  ReadFromStorage (Values)
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        Distinct (Preliminary DISTINCT)
+          Expression (Projection)
+            ArrayJoin (ARRAY JOIN)
+              Expression ((DROP unused columns before ARRAY JOIN + (ARRAY JOIN actions + (Change column names to column identifiers + Project names))))
+                Distinct (DISTINCT)
+                  Distinct (Preliminary DISTINCT)
+                    Expression ((Projection + Change column names to column identifiers))
+                      ReadFromStorage (Values)
 -- execute
 Hello
 World
@@ -196,16 +208,19 @@ FROM
     SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities
 )
 WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim']
+ORDER BY cities
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
-    Distinct (Preliminary DISTINCT)
-      Expression (Projection)
-        Filter ((WHERE + (Change column names to column identifiers + Project names)))
-          Distinct (DISTINCT)
-            Distinct (Preliminary DISTINCT)
-              Expression ((Projection + Change column names to column identifiers))
-                ReadFromStorage (SystemOne)
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        Distinct (Preliminary DISTINCT)
+          Expression (Projection)
+            Filter ((WHERE + (Change column names to column identifiers + Project names)))
+              Distinct (DISTINCT)
+                Distinct (Preliminary DISTINCT)
+                  Expression ((Projection + Change column names to column identifiers))
+                    ReadFromStorage (SystemOne)
 -- execute
 ['Istanbul','Berlin','Bensheim']
 -- GROUP BY before DISTINCT with on the same columns => remove DISTINCT
@@ -224,20 +239,23 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a
+    ORDER BY a
 )
 -- explain
-Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection)))))
-  Aggregating
-    Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
-      Join (JOIN FillRightFirst)
-        Expression (Change column names to column identifiers)
-          ReadFromSystemNumbers
-        Expression (Change column names to column identifiers)
-          ReadFromSystemNumbers
+Expression ((Project names + (Projection + (Change column names to column identifiers + Project names))))
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + Projection))
+      Aggregating
+        Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
+          Join (JOIN FillRightFirst)
+            Expression (Change column names to column identifiers)
+              ReadFromSystemNumbers
+            Expression (Change column names to column identifiers)
+              ReadFromSystemNumbers
 -- execute
 0
-2
 1
+2
 -- GROUP BY before DISTINCT with on different columns => do _not_ remove DISTINCT
 -- query
 SELECT DISTINCT c
@@ -254,19 +272,22 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a
+    ORDER BY a
 )
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
     Distinct (Preliminary DISTINCT)
-      Expression ((Projection + (Change column names to column identifiers + (Project names + Projection))))
-        Aggregating
-          Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
-            Join (JOIN FillRightFirst)
-              Expression (Change column names to column identifiers)
-                ReadFromSystemNumbers
-              Expression (Change column names to column identifiers)
-                ReadFromSystemNumbers
+      Expression ((Projection + (Change column names to column identifiers + Project names)))
+        Sorting (Sorting for ORDER BY)
+          Expression ((Before ORDER BY + Projection))
+            Aggregating
+              Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
+                Join (JOIN FillRightFirst)
+                  Expression (Change column names to column identifiers)
+                    ReadFromSystemNumbers
+                  Expression (Change column names to column identifiers)
+                    ReadFromSystemNumbers
 -- execute
 12
 -- GROUP BY WITH ROLLUP before DISTINCT with on different columns => do _not_ remove DISTINCT
@@ -285,20 +306,23 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH ROLLUP
+    ORDER BY a
 )
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
     Distinct (Preliminary DISTINCT)
-      Expression ((Projection + (Change column names to column identifiers + (Project names + Projection))))
-        Rollup
-          Aggregating
-            Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
-              Join (JOIN FillRightFirst)
-                Expression (Change column names to column identifiers)
-                  ReadFromSystemNumbers
-                Expression (Change column names to column identifiers)
-                  ReadFromSystemNumbers
+      Expression ((Projection + (Change column names to column identifiers + Project names)))
+        Sorting (Sorting for ORDER BY)
+          Expression ((Before ORDER BY + Projection))
+            Rollup
+              Aggregating
+                Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
+                  Join (JOIN FillRightFirst)
+                    Expression (Change column names to column identifiers)
+                      ReadFromSystemNumbers
+                    Expression (Change column names to column identifiers)
+                      ReadFromSystemNumbers
 -- execute
 12
 36
@@ -318,22 +342,25 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH ROLLUP
+    ORDER BY a
 )
 -- explain
-Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection)))))
-  Rollup
-    Aggregating
-      Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
-        Join (JOIN FillRightFirst)
-          Expression (Change column names to column identifiers)
-            ReadFromSystemNumbers
-          Expression (Change column names to column identifiers)
-            ReadFromSystemNumbers
+Expression ((Project names + (Projection + (Change column names to column identifiers + Project names))))
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + Projection))
+      Rollup
+        Aggregating
+          Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
+            Join (JOIN FillRightFirst)
+              Expression (Change column names to column identifiers)
+                ReadFromSystemNumbers
+              Expression (Change column names to column identifiers)
+                ReadFromSystemNumbers
 -- execute
 0
-2
-1
 0
+1
+2
 -- GROUP BY WITH CUBE before DISTINCT with on different columns => do _not_ remove DISTINCT
 -- query
 SELECT DISTINCT c
@@ -350,20 +377,23 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH CUBE
+    ORDER BY a
 )
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
     Distinct (Preliminary DISTINCT)
-      Expression ((Projection + (Change column names to column identifiers + (Project names + Projection))))
-        Cube
-          Aggregating
-            Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
-              Join (JOIN FillRightFirst)
-                Expression (Change column names to column identifiers)
-                  ReadFromSystemNumbers
-                Expression (Change column names to column identifiers)
-                  ReadFromSystemNumbers
+      Expression ((Projection + (Change column names to column identifiers + Project names)))
+        Sorting (Sorting for ORDER BY)
+          Expression ((Before ORDER BY + Projection))
+            Cube
+              Aggregating
+                Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
+                  Join (JOIN FillRightFirst)
+                    Expression (Change column names to column identifiers)
+                      ReadFromSystemNumbers
+                    Expression (Change column names to column identifiers)
+                      ReadFromSystemNumbers
 -- execute
 12
 36
@@ -383,22 +413,25 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH CUBE
+    ORDER BY a
 )
 -- explain
-Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection)))))
-  Cube
-    Aggregating
-      Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
-        Join (JOIN FillRightFirst)
-          Expression (Change column names to column identifiers)
-            ReadFromSystemNumbers
-          Expression (Change column names to column identifiers)
-            ReadFromSystemNumbers
+Expression ((Project names + (Projection + (Change column names to column identifiers + Project names))))
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + Projection))
+      Cube
+        Aggregating
+          Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
+            Join (JOIN FillRightFirst)
+              Expression (Change column names to column identifiers)
+                ReadFromSystemNumbers
+              Expression (Change column names to column identifiers)
+                ReadFromSystemNumbers
 -- execute
 0
-2
-1
 0
+1
+2
 -- GROUP BY WITH TOTALS before DISTINCT with on different columns => do _not_ remove DISTINCT
 -- query
 SELECT DISTINCT c
@@ -415,20 +448,23 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH TOTALS
+    ORDER BY a
 )
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
     Distinct (Preliminary DISTINCT)
-      Expression ((Projection + (Change column names to column identifiers + (Project names + Projection))))
-        TotalsHaving
-          Aggregating
-            Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
-              Join (JOIN FillRightFirst)
-                Expression (Change column names to column identifiers)
-                  ReadFromSystemNumbers
-                Expression (Change column names to column identifiers)
-                  ReadFromSystemNumbers
+      Expression ((Projection + (Change column names to column identifiers + Project names)))
+        Sorting (Sorting for ORDER BY)
+          Expression ((Before ORDER BY + Projection))
+            TotalsHaving
+              Aggregating
+                Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
+                  Join (JOIN FillRightFirst)
+                    Expression (Change column names to column identifiers)
+                      ReadFromSystemNumbers
+                    Expression (Change column names to column identifiers)
+                      ReadFromSystemNumbers
 -- execute
 12
 
@@ -449,21 +485,24 @@ FROM
         FROM numbers(3) AS x, numbers(3, 3) AS y
     )
     GROUP BY a WITH TOTALS
+    ORDER BY a
 )
 -- explain
-Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection)))))
-  TotalsHaving
-    Aggregating
-      Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
-        Join (JOIN FillRightFirst)
-          Expression (Change column names to column identifiers)
-            ReadFromSystemNumbers
-          Expression (Change column names to column identifiers)
-            ReadFromSystemNumbers
+Expression ((Project names + (Projection + (Change column names to column identifiers + Project names))))
+  Sorting (Sorting for ORDER BY)
+    Expression ((Before ORDER BY + Projection))
+      TotalsHaving
+        Aggregating
+          Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN)))))
+            Join (JOIN FillRightFirst)
+              Expression (Change column names to column identifiers)
+                ReadFromSystemNumbers
+              Expression (Change column names to column identifiers)
+                ReadFromSystemNumbers
 -- execute
 0
-2
 1
+2
 
 0
 -- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT
@@ -490,21 +529,24 @@ FROM
     SELECT DISTINCT number
     FROM numbers(2)
 )
+ORDER BY number
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
-    Distinct (Preliminary DISTINCT)
-      Union
-        Expression ((Projection + (Change column names to column identifiers + Project names)))
-          Distinct (DISTINCT)
-            Distinct (Preliminary DISTINCT)
-              Expression ((Projection + Change column names to column identifiers))
-                ReadFromSystemNumbers
-        Expression (( + ( + Project names)))
-          Distinct (DISTINCT)
-            Distinct (Preliminary DISTINCT)
-              Expression ((Projection + Change column names to column identifiers))
-                ReadFromSystemNumbers
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        Distinct (Preliminary DISTINCT)
+          Union
+            Expression ((Projection + (Change column names to column identifiers + Project names)))
+              Distinct (DISTINCT)
+                Distinct (Preliminary DISTINCT)
+                  Expression ((Projection + Change column names to column identifiers))
+                    ReadFromSystemNumbers
+            Expression (( + ( + Project names)))
+              Distinct (DISTINCT)
+                Distinct (Preliminary DISTINCT)
+                  Expression ((Projection + Change column names to column identifiers))
+                    ReadFromSystemNumbers
 -- execute
 0
 1

From e5f849bdae77eeb8e4521553516f4e39a2faf4cd Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 8 Dec 2023 15:26:17 +0000
Subject: [PATCH 030/137] fix some tests

---
 tests/clickhouse-test                          |  4 ++--
 tests/queries/0_stateless/00155_long_merges.sh | 14 +++++++++++---
 tests/queries/0_stateless/00947_ml_test.sql    |  4 ++--
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 2ca22577083..8d813fbb904 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -608,10 +608,10 @@ class SettingsRandomizer:
         "merge_tree_coarse_index_granularity": lambda: random.randint(2, 32),
         "optimize_distinct_in_order": lambda: random.randint(0, 1),
         "max_bytes_before_external_sort": threshold_generator(
-            0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
+            1.0, 0.5, 1, 10 * 1024 * 1024 * 1024
         ),
         "max_bytes_before_external_group_by": threshold_generator(
-            0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
+            1.0, 0.5, 1, 10 * 1024 * 1024 * 1024
         ),
         "max_bytes_before_remerge_sort": lambda: random.randint(1, 3000000000),
         "optimize_sorting_by_input_stream_properties": lambda: random.randint(0, 1),
diff --git a/tests/queries/0_stateless/00155_long_merges.sh b/tests/queries/0_stateless/00155_long_merges.sh
index 9ed0f2c6de1..8ecca0aeb42 100755
--- a/tests/queries/0_stateless/00155_long_merges.sh
+++ b/tests/queries/0_stateless/00155_long_merges.sh
@@ -34,32 +34,40 @@ function test {
 
     SETTINGS="--min_insert_block_size_rows=0 --min_insert_block_size_bytes=0 --max_block_size=65505"
 
+    $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES summing_00155"
     $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO summing_00155 (x) SELECT number AS x FROM system.numbers LIMIT $1"
     $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO summing_00155 (x) SELECT number AS x FROM system.numbers LIMIT $2"
 
+    $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES collapsing_00155"
     $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO collapsing_00155 (x) SELECT number AS x FROM system.numbers LIMIT $1"
     $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO collapsing_00155 (x) SELECT number AS x FROM system.numbers LIMIT $2"
 
+    $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES aggregating_00155"
     $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO aggregating_00155 (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $1) GROUP BY number"
     $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO aggregating_00155 (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $2) GROUP BY number"
 
+    $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES replacing_00155"
     $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO replacing_00155 (x, v) SELECT number AS x, toUInt64(number % 3 == 0) FROM system.numbers LIMIT $1"
     $CLICKHOUSE_CLIENT $SETTINGS --query="INSERT INTO replacing_00155 (x, v) SELECT number AS x, toUInt64(number % 3 == 1) FROM system.numbers LIMIT $2"
 
     $CLICKHOUSE_CLIENT --query="SELECT count() = $SUM, sum(s) = $SUM FROM summing_00155"
+    $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES summing_00155"
     $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE summing_00155"
     $CLICKHOUSE_CLIENT --query="SELECT count() = $MAX, sum(s) = $SUM FROM summing_00155"
     echo
     $CLICKHOUSE_CLIENT --query="SELECT count() = $SUM, sum(s) = $SUM FROM collapsing_00155"
-    $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE collapsing_00155" --server_logs_file='/dev/null';
+    $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES collapsing_00155"
+    $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE collapsing_00155 FINAL" --server_logs_file='/dev/null';
     $CLICKHOUSE_CLIENT --query="SELECT count() = $MAX, sum(s) = $MAX FROM collapsing_00155"
     echo
     $CLICKHOUSE_CLIENT --query="SELECT count() = $SUM, sumMerge(s) = $SUM FROM aggregating_00155"
-    $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE aggregating_00155"
+    $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES aggregating_00155"
+    $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE aggregating_00155 FINAL"
     $CLICKHOUSE_CLIENT --query="SELECT count() = $MAX, sumMerge(s) = $SUM FROM aggregating_00155"
     echo
     $CLICKHOUSE_CLIENT --query="SELECT count() = $SUM, sum(s) = $SUM FROM replacing_00155"
-    $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE replacing_00155"
+    $CLICKHOUSE_CLIENT --query="SYSTEM START MERGES replacing_00155"
+    $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE replacing_00155 FINAL"
     $CLICKHOUSE_CLIENT --query="SELECT count() = $MAX, sum(s) = $MAX FROM replacing_00155"
     $CLICKHOUSE_CLIENT --query="SELECT count() = sum(v) FROM replacing_00155 where x % 3 == 0 and x < $1"
     $CLICKHOUSE_CLIENT --query="SELECT count() = sum(v) FROM replacing_00155 where x % 3 == 1 and x < $2"
diff --git a/tests/queries/0_stateless/00947_ml_test.sql b/tests/queries/0_stateless/00947_ml_test.sql
index 94e4f3b4626..72000103a44 100644
--- a/tests/queries/0_stateless/00947_ml_test.sql
+++ b/tests/queries/0_stateless/00947_ml_test.sql
@@ -40,10 +40,10 @@ INSERT INTO grouptest VALUES
 (1, 1.732, 3.653, 11.422), (1, 2.150, 2.103, 7.609), (1, 0.061, 3.310, 7.052), (1, 1.030, 3.671, 10.075), (1, 1.879, 0.578, 2.492), (1, 0.922, 2.552, 6.499), (1, 1.145, -0.095, -0.993), (1, 1.920, 0.373, 1.959), (1, 0.458, 0.094, -1.801), (1, -0.118, 3.273, 6.582), (1, 2.667, 1.472, 6.752), (1, -0.387, -0.529, -5.360), (1, 2.219, 1.790, 6.810), (1, -0.754, 2.139, 1.908), (1, -0.446, -0.668, -5.896), (1, 1.729, 0.914, 3.199), (1, 2.908, -0.420, 1.556), (1, 1.645, 3.581, 11.034), (1, 0.358, -0.950, -5.136), (1, -0.467, 2.339, 3.084), (1, 3.629, 2.959, 13.135), (1, 2.393, 0.926, 4.563), (1, -0.945, 0.281, -4.047), (1, 3.688, -0.570, 2.667), (1, 3.016, 1.775, 8.356), (1, 2.571, 0.139, 2.559), (1, 2.999, 0.956, 5.866), (1, 1.754, -0.809, -1.920), (1, 3.943, 0.382, 6.030), (1, -0.970, 2.315, 2.004), (1, 1.503, 0.790, 2.376), (1, -0.775, 2.563, 3.139), (1, 1.211, 0.113, -0.240), (1, 3.058, 0.977, 6.048), (1, 2.729, 1.634, 7.360), (1, 0.307, 2.759, 5.893), (1, 3.272, 0.181, 4.089), (1, 1.192, 1.963, 5.273), (1, 0.931, 1.447, 3.203), (1, 3.835, 3.447, 15.011), (1, 0.709, 0.008, -1.559), (1, 3.155, -0.676, 1.283), (1, 2.342, 1.047, 4.824), (1, 2.059, 1.262, 4.903), (1, 2.797, 0.855, 5.159), (1, 0.387, 0.645, -0.292), (1, 1.418, 0.408, 1.060), (1, 2.719, -0.826, -0.039), (1, 2.735, 3.736, 13.678), (1, 0.205, 0.777, -0.260), (1, 3.117, 2.063, 9.424), (1, 0.601, 0.178, -1.263), (1, 0.064, 0.157, -2.401), (1, 3.104, -0.455, 1.842), (1, -0.253, 0.672, -1.490), (1, 2.592, -0.408, 0.961), (1, -0.909, 1.314, -0.878), (1, 0.625, 2.594, 6.031), (1, 2.749, -0.210, 1.869), (1, -0.469, 1.532, 0.657), (1, 1.954, 1.827, 6.388), (1, -0.528, 1.136, -0.647), (1, 0.802, -0.583, -3.146), (1, -0.176, 1.584, 1.400), (1, -0.705, -0.785, -6.766), (1, 1.660, 2.365, 7.416), (1, 2.278, 3.977, 13.485), (1, 2.846, 3.845, 14.229), (1, 3.588, -0.401, 2.974), (1, 3.525, 3.831, 15.542), (1, 0.191, 3.312, 7.318), (1, 2.615, -0.287, 1.370), (1, 2.701, -0.446, 1.064), (1, 2.065, -0.556, -0.538), (1, 2.572, 3.618, 12.997), (1, 3.743, -0.708, 2.362), (1, 3.734, 2.319, 11.425), (1, 3.768, 2.777, 12.866), (1, 3.203, 0.958, 6.280), (1, 1.512, 2.635, 7.927), (1, 2.194, 2.323, 8.356), (1, -0.726, 2.729, 3.735), (1, 0.020, 1.704, 2.152), (1, 2.173, 2.856, 9.912), (1, 3.124, 1.705, 8.364), (1, -0.834, 2.142, 1.759), (1, -0.702, 3.024, 4.666), (1, 1.393, 0.583, 1.535), (1, 2.136, 3.770, 12.581), (1, -0.445, 0.991, -0.917), (1, 0.244, -0.835, -5.016), (1, 2.789, 0.691, 4.652), (1, 0.246, 2.661, 5.475), (1, 3.793, 2.671, 12.601), (1, 1.645, -0.973, -2.627), (1, 2.405, 1.842, 7.336), (1, 3.221, 3.109, 12.769), (1, -0.638, 3.220, 5.385), (1, 1.836, 3.025, 9.748), (1, -0.660, 1.818, 1.133), (1, 0.901, 0.981, 1.744), (1, -0.236, 3.087, 5.789), (1, 1.744, 3.864, 12.078), (1, -0.166, 3.186, 6.226), (1, 3.536, -0.090, 3.803), (1, 3.284, 2.026, 9.648), (1, 1.327, 2.822, 8.119), (1, -0.709, 0.105, -4.104), (1, 0.509, -0.989, -4.949), (1, 0.180, -0.934, -5.440), (1, 3.522, 1.374, 8.168), (1, 1.497, -0.764, -2.297), (1, 1.696, 2.364, 7.482), (1, -0.202, -0.032, -3.500), (1, 3.109, -0.138, 2.804), (1, -0.238, 2.992, 5.501), (1, 1.639, 1.634, 5.181), (1, 1.919, 0.341, 1.859), (1, -0.563, 1.750, 1.124), (1, 0.886, 3.589, 9.539), (1, 3.619, 3.020, 13.299), (1, 1.703, -0.493, -1.073), (1, 2.364, 3.764, 13.022), (1, 1.820, 1.854, 6.201), (1, 1.437, -0.765, -2.421), (1, 1.396, 0.959, 2.668), (1, 2.608, 2.032, 8.312), (1, 0.333, -0.040, -2.455), (1, 3.441, 0.824, 6.355), (1, 1.303, 2.767, 7.908), (1, 1.359, 2.404, 6.932), (1, 0.674, 0.241, -0.930), (1, 2.708, -0.077, 2.183), (1, 3.821, 3.215, 14.287), (1, 3.316, 1.591, 8.404), (1, -0.848, 1.145, -1.259), (1, 3.455, 3.081, 13.153), (1, 2.568, 0.259, 2.914), (1, 2.866, 2.636, 10.642), (1, 2.776, -0.309, 1.626), (1, 2.087, 0.619, 3.031), (1, 1.682, 1.201, 3.967), (1, 3.800, 2.600, 12.399), (1, 3.344, -0.780, 1.347), (1, 1.053, -0.817, -3.346), (1, 0.805, 3.085, 7.865), (1, 0.173, 0.069, -2.449), (1, 2.018, 1.309, 4.964), (1, 3.713, 3.804, 15.838), (1, 3.805, -0.063, 4.421), (1, 3.587, 2.854, 12.738), (1, 2.426, -0.179, 1.315), (1, 0.535, 0.572, -0.213), (1, -0.558, 0.142, -3.690), (1, -0.875, 2.700, 3.349), (1, 2.405, 3.933, 13.610), (1, 1.633, 1.222, 3.934), (1, 0.049, 2.853, 5.657), (1, 1.146, 0.907, 2.015), (1, 0.300, 0.219, -1.744), (1, 2.226, 2.526, 9.029), (1, 2.545, -0.762, -0.198), (1, 2.553, 3.956, 13.974), (1, -0.898, 2.836, 3.713), (1, 3.796, -0.202, 3.985), (1, -0.810, 2.963, 4.268), (1, 0.511, 2.104, 4.334), (1, 3.527, 3.741, 15.275), (1, -0.921, 3.094, 4.440), (1, 0.856, 3.108, 8.036), (1, 0.815, 0.565, 0.323), (1, 3.717, 0.693, 6.512), (1, 3.052, 3.558, 13.778), (1, 2.942, 3.034, 11.986), (1, 0.765, 3.177, 8.061), (1, 3.175, -0.525, 1.776), (1, 0.309, 1.006, 0.638), (1, 1.922, 0.835, 3.349), (1, 3.678, 3.314, 14.297), (1, 2.840, -0.486, 1.221), (1, 1.195, 3.396, 9.578), (1, -0.157, 3.122, 6.053), (1, 2.404, 1.434, 6.110), (1, 3.108, 2.210, 9.845), (1, 2.289, 1.188, 5.142), (1, -0.319, -0.044, -3.769), (1, -0.625, 3.701, 6.854), (1, 2.269, -0.276, 0.710), (1, 0.777, 1.963, 4.442), (1, 0.411, 1.893, 3.501), (1, 1.173, 0.461, 0.728), (1, 1.767, 3.077, 9.765), (1, 0.853, 3.076, 7.933), (1, -0.013, 3.149, 6.421), (1, 3.841, 1.526, 9.260), (1, -0.950, 0.277, -4.070), (1, -0.644, -0.747, -6.527), (1, -0.923, 1.733, 0.353), (1, 0.044, 3.037, 6.201), (1, 2.074, 2.494, 8.631), (1, 0.016, 0.961, -0.085), (1, -0.780, -0.448, -5.904), (1, 0.170, 1.936, 3.148), (1, -0.420, 3.730, 7.349), (1, -0.630, 1.504, 0.254), (1, -0.006, 0.045, -2.879), (1, 1.101, -0.985, -3.753), (1, 1.618, 0.555, 1.900), (1, -0.336, 1.408, 0.552), (1, 1.086, 3.284, 9.024), (1, -0.815, 2.032, 1.466), (1, 3.144, -0.380, 2.148), (1, 2.326, 2.077, 7.883), (1, -0.571, 0.964, -1.251), (1, 2.416, 1.255, 5.595), (1, 3.964, 1.379, 9.065), (1, 3.897, 1.553, 9.455), (1, 1.806, 2.667, 8.611), (1, 0.323, 3.809, 9.073), (1, 0.501, 3.256, 7.769), (1, -0.679, 3.539, 6.259), (1, 2.825, 3.856, 14.219), (1, 0.288, -0.536, -4.032), (1, 3.009, 0.725, 5.193), (1, -0.763, 1.140, -1.105), (1, 1.124, 3.807, 10.670), (1, 2.478, 0.204, 2.570), (1, 2.825, 2.639, 10.566), (1, 1.878, -0.883, -1.892), (1, 3.380, 2.942, 12.587), (1, 2.202, 1.739, 6.621), (1, -0.711, -0.680, -6.463), (1, -0.266, 1.827, 1.951), (1, -0.846, 1.003, -1.683), (1, 3.201, 0.132, 3.798), (1, 2.797, 0.085, 2.849), (1, 1.632, 3.269, 10.072), (1, 2.410, 2.727, 10.003), (1, -0.624, 0.853, -1.690), (1, 1.314, 3.268, 9.433), (1, -0.395, 0.450, -2.440), (1, 0.992, 3.168, 8.489), (1, 3.355, 2.106, 10.028), (1, 0.509, -0.888, -4.647), (1, 1.007, 0.797, 1.405), (1, 0.045, 0.211, -2.278), (1, -0.911, 1.093, -1.544), (1, 2.409, 0.273, 2.637), (1, 2.640, 3.540, 12.899), (1, 2.668, -0.433, 1.038), (1, -0.014, 0.341, -2.005), (1, -0.525, -0.344, -5.083), (1, 2.278, 3.517, 12.105), (1, 3.712, 0.901, 7.128), (1, -0.689, 2.842, 4.149), (1, -0.467, 1.263, -0.147), (1, 0.963, -0.653, -3.034), (1, 2.559, 2.590, 9.889), (1, 1.566, 1.393, 4.312), (1, -1.000, 1.809, 0.429), (1, -0.297, 3.221, 6.070), (1, 2.199, 3.820, 12.856), (1, 3.096, 3.251, 12.944), (1, 1.479, 1.835, 5.461), (1, 0.276, 0.773, -0.130), (1, 0.607, 1.382, 2.360), (1, 1.169, -0.108, -0.985), (1, 3.429, 0.475, 5.282), (1, 2.626, 0.104, 2.563), (1, 1.156, 3.512, 9.850), (1, 3.947, 0.796, 7.282), (1, -0.462, 2.425, 3.351), (1, 3.957, 0.366, 6.014), (1, 3.763, -0.330, 3.536), (1, 0.667, 3.361, 8.417), (1, -0.583, 0.892, -1.492), (1, -0.505, 1.344, 0.021), (1, -0.474, 2.714, 4.195), (1, 3.455, 0.014, 3.950), (1, 1.016, 1.828, 4.516), (1, 1.845, 0.193, 1.269), (1, -0.529, 3.930, 7.731), (1, 2.636, 0.045, 2.408), (1, 3.757, -0.918, 1.760), (1, -0.808, 1.160, -1.137), (1, 0.744, 1.435, 2.793), (1, 3.457, 3.566, 14.613), (1, 1.061, 3.140, 8.544), (1, 3.733, 3.368, 14.570), (1, -0.969, 0.879, -2.301), (1, 3.940, 3.136, 14.287), (1, -0.730, 2.107, 1.860), (1, 3.699, 2.820, 12.858), (1, 2.197, -0.636, -0.514), (1, 0.775, -0.979, -4.387), (1, 2.019, 2.828, 9.521), (1, 1.415, 0.113, 0.170), (1, 1.567, 3.410, 10.363), (1, 0.984, -0.960, -3.913), (1, 1.809, 2.487, 8.079), (1, 1.550, 1.130, 3.489), (1, -0.770, 3.027, 4.542), (1, -0.358, 3.326, 6.262), (1, 3.140, 0.096, 3.567), (1, -0.685, 2.213, 2.270), (1, 0.916, 0.692, 0.907), (1, 1.526, 1.159, 3.527), (1, 2.675, -0.568, 0.645), (1, 1.740, 3.019, 9.538), (1, 1.223, 2.088, 5.709), (1, 1.572, -0.125, -0.230), (1, 3.641, 0.362, 5.369), (1, 2.944, 3.897, 14.578), (1, 2.775, 2.461, 9.932), (1, -0.200, 2.492, 4.076), (1, 0.065, 2.055, 3.296), (1, 2.375, -0.639, -0.167), (1, -0.133, 1.138, 0.149), (1, -0.385, 0.163, -3.281), (1, 2.200, 0.863, 3.989), (1, -0.470, 3.492, 6.536), (1, -0.916, -0.547, -6.472), (1, 0.634, 0.927, 1.049), (1, 2.930, 2.655, 10.825), (1, 3.094, 2.802, 11.596), (1, 0.457, 0.539, -0.470), (1, 1.277, 2.229, 6.240), (1, -0.157, 1.270, 0.496), (1, 3.320, 0.640, 5.559), (1, 2.836, 1.067, 5.872), (1, 0.921, -0.716, -3.307), (1, 3.886, 1.487, 9.233), (1, 0.306, -0.142, -2.815), (1, 3.727, -0.410, 3.225), (1, 1.268, -0.801, -2.866), (1, 2.302, 2.493, 9.084), (1, 0.331, 0.373, -1.220), (1, 3.224, -0.857, 0.879), (1, 1.328, 2.786, 8.014), (1, 3.639, 1.601, 9.081), (1, 3.201, -0.484, 1.949), (1, 3.447, -0.734, 1.692), (1, 2.773, -0.143, 2.117), (1, 1.517, -0.493, -1.445), (1, 1.778, -0.428, -0.728), (1, 3.989, 0.099, 5.274), (1, 1.126, 3.985, 11.206), (1, 0.348, 0.756, -0.035), (1, 2.399, 2.576, 9.525), (1, 0.866, 1.800, 4.132), (1, 3.612, 1.598, 9.017), (1, 0.495, 2.239, 4.707), (1, 2.442, 3.712, 13.019), (1, 0.238, -0.844, -5.057), (1, 1.404, 3.095, 9.093), (1, 2.842, 2.044, 8.816), (1, 0.622, 0.322, -0.791), (1, -0.561, 1.242, -0.395), (1, 0.679, 3.822, 9.823), (1, 1.875, 3.526, 11.327), (1, 3.587, 1.050, 7.324), (1, 1.467, 0.588, 1.699), (1, 3.180, 1.571, 8.074), (1, 1.402, 0.430, 1.093), (1, 1.834, 2.209, 7.294), (1, 3.542, -0.259, 3.306), (1, -0.517, 0.174, -3.513), (1, 3.549, 2.210, 10.729), (1, 2.260, 3.393, 11.699), (1, 0.036, 1.893, 2.751), (1, 0.680, 2.815, 6.804), (1, 0.219, 0.368, -1.459), (1, -0.519, 3.987, 7.924), (1, 0.974, 0.761, 1.231), (1, 0.107, 0.620, -0.927), (1, 1.513, 1.910, 5.755), (1, 3.114, 0.894, 5.910), (1, 3.061, 3.052, 12.276), (1, 2.556, 3.779, 13.448), (1, 1.964, 2.692, 9.002), (1, 3.894, -0.032, 4.690), (1, -0.693, 0.910, -1.655), (1, 2.692, 2.908, 11.108), (1, -0.824, 1.190, -1.078), (1, 3.621, 0.918, 6.997), (1, 3.190, 2.442, 10.707), (1, 1.424, -0.546, -1.791), (1, 2.061, -0.427, -0.158), (1, 1.532, 3.158, 9.540), (1, 0.648, 3.557, 8.967), (1, 2.511, 1.665, 7.017), (1, 1.903, -0.168, 0.302), (1, -0.186, -0.718, -5.528), (1, 2.421, 3.896, 13.531), (1, 3.063, 1.841, 8.650), (1, 0.636, 1.699, 3.367), (1, 1.555, 0.688, 2.174), (1, -0.412, 0.454, -2.462), (1, 1.645, 3.207, 9.911), (1, 3.396, 3.766, 15.090), (1, 0.375, -0.256, -3.017), (1, 3.636, 0.732, 6.469), (1, 2.503, 3.133, 11.405), (1, -0.253, 0.693, -1.429), (1, 3.178, 3.110, 12.686), (1, 3.282, -0.725, 1.388), (1, -0.297, 1.222, 0.070), (1, 1.872, 3.211, 10.377), (1, 3.471, 1.446, 8.278), (1, 2.891, 0.197, 3.374), (1, -0.896, 2.198, 1.802), (1, 1.178, -0.717, -2.796), (1, 0.650, 3.371, 8.412), (1, 0.447, 3.248, 7.637), (1, 1.616, -0.109, -0.097), (1, 1.837, 1.092, 3.951), (1, 0.767, 1.384, 2.684), (1, 3.466, -0.600, 2.133), (1, -0.800, -0.734, -6.802), (1, -0.534, 0.068, -3.865), (1, 3.416, -0.459, 2.455), (1, 0.800, -0.132, -1.795), (1, 2.150, 1.190, 4.869), (1, 0.830, 1.220, 2.319), (1, 2.656, 2.587, 10.072), (1, 0.375, -0.219, -2.906), (1, 0.582, -0.637, -3.749), (1, 0.588, -0.723, -3.992), (1, 3.875, 2.126, 11.127), (1, -0.476, 1.909, 1.775), (1, 0.963, 3.597, 9.716), (1, -0.888, 3.933, 7.021), (1, 1.711, -0.868, -2.184), (1, 3.244, 1.990, 9.460), (1, -0.057, 1.537, 1.497), (1, -0.015, 3.511, 7.504), (1, 0.280, 0.582, -0.695), (1, 2.402, 2.731, 9.998), (1, 2.053, 2.253, 7.865), (1, 1.955, 0.172, 1.424), (1, 3.746, 0.872, 7.107), (1, -0.157, 2.381, 3.829), (1, 3.548, -0.918, 1.340), (1, 2.449, 3.195, 11.482), (1, 1.582, 1.055, 3.329), (1, 1.908, -0.839, -1.700), (1, 2.341, 3.137, 11.091), (1, -0.043, 3.873, 8.532), (1, 0.528, -0.752, -4.198), (1, -0.940, 0.261, -4.098), (1, 2.609, 3.531, 12.812), (1, 2.439, 2.486, 9.336), (1, -0.659, -0.150, -4.768), (1, 2.131, 1.973, 7.181), (1, 0.253, 0.304, -1.583), (1, -0.169, 2.273, 3.480), (1, 1.855, 3.974, 12.631), (1, 0.092, 1.160, 0.666), (1, 3.990, 0.402, 6.187), (1, -0.455, 0.932, -1.113), (1, 2.365, 1.152, 5.185), (1, -0.058, 1.244, 0.618), (1, 0.674, 0.481, -0.209), (1, 3.002, 0.246, 3.743), (1, 1.804, 3.765, 11.902), (1, 3.567, -0.752, 1.876), (1, 0.098, 2.257, 3.968), (1, 0.130, -0.889, -5.409), (1, 0.633, 1.891, 3.940), (1, 0.421, 2.533, 5.440), (1, 2.252, 1.853, 7.063), (1, 3.191, -0.980, 0.443), (1, -0.776, 3.241, 5.171), (1, 0.509, 1.737, 3.229), (1, 3.583, 1.274, 7.986), (1, 1.101, 2.896, 7.891), (1, 3.072, -0.008, 3.120), (1, 2.945, -0.295, 2.006), (1, 3.621, -0.161, 3.760), (1, 1.399, 3.759, 11.075), (1, 3.783, -0.866, 1.968), (1, -0.241, 2.902, 5.225), (1, 1.323, 1.934, 5.449), (1, 1.449, 2.855, 8.464), (1, 0.088, 1.526, 1.753), (1, -1.000, 2.161, 1.485), (1, -0.214, 3.358, 6.647), (1, -0.384, 3.230, 5.921), (1, 3.146, 1.228, 6.975), (1, 1.917, 0.860, 3.415), (1, 1.982, 1.735, 6.167), (1, 1.404, 1.851, 5.360), (1, 2.428, -0.674, -0.166), (1, 2.081, -0.505, -0.352), (1, 0.914, -0.543, -2.802), (1, -0.029, -0.482, -4.506), (1, 0.671, 0.184, -1.105), (1, 1.641, -0.524, -1.292), (1, 1.005, 0.361, 0.094), (1, -0.493, 3.582, 6.760), (2, 3.876, 2.563, 21.500), (2, 0.159, -0.309, 7.986), (2, -0.496, 0.417, 12.998), (2, -0.164, -0.512, 7.092), (2, 0.632, 3.200, 28.571), (2, 3.772, 0.493, 9.188), (2, 2.430, -0.797, 2.789), (2, 3.872, -0.775, 1.475), (2, -0.031, -0.256, 8.495), (2, 2.726, 3.000, 25.271), (2, 1.116, -0.269, 7.269), (2, 0.551, 3.402, 29.860), (2, 0.820, 2.500, 24.179), (2, 1.153, -0.453, 6.131), (2, -0.717, -0.360, 8.556), (2, 0.532, 0.531, 12.654), (2, 2.096, 0.981, 13.791), (2, 0.146, -0.433, 7.259), (2, 1.000, 1.075, 15.452), (2, 2.963, -0.090, 6.495), (2, 1.047, 2.052, 21.267), (2, 0.882, 1.778, 19.785), (2, 1.380, 2.702, 24.832), (2, 1.853, 0.401, 10.554), (2, 2.004, 1.770, 18.618), (2, 3.377, 0.772, 11.253), (2, 1.227, -0.169, 7.759), (2, 0.428, 2.052, 21.885), (2, 0.070, 3.648, 31.816), (2, 0.128, -0.938, 4.244), (2, 2.061, 0.753, 12.454), (2, 1.207, -0.301, 6.989), (2, -0.168, 3.765, 32.757), (2, 3.450, 1.801, 17.353), (2, -0.483, 3.344, 30.547), (2, 1.847, 1.884, 19.455), (2, 3.241, 2.369, 20.975), (2, 0.628, 3.590, 30.912), (2, 2.183, 1.741, 18.263), (2, 0.774, 2.638, 25.057), (2, 3.292, 2.867, 23.912), (2, 0.056, 2.651, 25.850), (2, -0.506, 0.300, 12.308), (2, 0.524, 1.182, 16.570), (2, -0.267, 2.563, 25.647), (2, 3.953, -0.334, 4.040), (2, 2.507, 2.319, 21.408), (2, -0.770, 1.017, 16.875), (2, 0.481, 1.591, 19.062), (2, 3.243, 1.060, 13.114), (2, 2.178, -0.325, 5.873), (2, 2.510, 1.235, 14.900), (2, 2.684, 2.370, 21.535), (2, 3.466, 3.656, 28.469), (2, 2.994, 3.960, 30.764), (2, -0.363, 3.592, 31.917), (2, 1.738, 0.074, 8.708), (2, 1.462, 3.727, 30.902), (2, 0.059, 0.180, 11.021), (2, 2.980, 2.317, 20.925), (2, 1.248, 0.965, 14.545), (2, 0.776, -0.229, 7.850), (2, -0.562, 2.839, 27.598), (2, 3.581, 0.244, 7.883), (2, -0.958, 0.901, 16.362), (2, 3.257, 0.364, 8.925), (2, 1.478, 1.718, 18.827), (2, -0.121, -0.436, 7.507), (2, 0.966, 1.444, 17.697), (2, 3.631, 3.463, 27.144), (2, 0.174, -0.663, 5.848), (2, 2.783, 0.124, 7.959), (2, 1.106, -0.936, 3.276), (2, 0.186, -0.942, 4.162), (2, 3.513, 2.456, 21.222), (2, 0.339, 2.316, 23.558), (2, 0.566, 2.515, 24.523), (2, -0.134, 0.746, 14.607), (2, 1.554, 0.106, 9.084), (2, -0.846, 2.748, 27.337), (2, 3.934, 0.564, 9.451), (2, 2.840, -0.966, 1.366), (2, 1.379, 0.307, 10.463), (2, 1.065, -0.780, 4.253), (2, 3.324, 2.145, 19.546), (2, 0.974, -0.543, 5.767), (2, 2.469, 3.976, 31.385), (2, -0.434, 3.689, 32.570), (2, 0.261, 0.481, 12.624), (2, 3.786, 2.605, 21.843), (2, -0.460, -0.536, 7.243), (2, 2.576, 2.880, 24.702), (2, -0.501, 3.551, 31.810), (2, 2.946, 3.263, 26.633), (2, 2.959, -0.813, 2.162), (2, -0.749, 0.490, 13.686), (2, 2.821, 0.335, 9.187), (2, 3.964, 0.272, 7.667), (2, 0.808, -0.700, 4.994), (2, 0.415, 2.183, 22.682), (2, 2.551, 3.785, 30.156), (2, 0.821, 1.120, 15.897), (2, 1.714, 3.019, 26.400), (2, 2.265, 1.950, 19.438), (2, 1.493, 3.317, 28.409), (2, -0.445, 2.282, 24.134), (2, -0.508, 2.508, 25.553), (2, 1.017, -0.621, 5.255), (2, 1.053, 2.246, 22.422), (2, 0.441, 1.637, 19.382), (2, 3.657, 1.246, 13.816), (2, 0.756, 0.808, 14.095), (2, 1.849, 1.599, 17.742), (2, 1.782, -0.000, 8.215), (2, 1.136, 3.940, 32.506), (2, 2.814, 3.288, 26.916), (2, 3.180, 3.198, 26.008), (2, 0.728, -0.054, 8.946), (2, 0.801, 0.775, 13.852), (2, 1.399, -0.546, 5.322), (2, 1.415, 1.753, 19.103), (2, 2.860, 1.796, 17.913), (2, 0.712, 2.902, 26.699), (2, -0.389, 3.093, 28.945), (2, 3.661, 3.666, 28.333), (2, 3.944, 0.996, 12.030), (2, 1.655, 1.385, 16.657), (2, 0.122, -0.662, 5.906), (2, 3.667, 2.763, 22.912), (2, 2.606, 0.630, 11.172), (2, -0.291, 1.492, 19.242), (2, -0.787, 1.223, 18.125), (2, 2.405, 0.325, 9.545), (2, 3.129, -0.412, 4.398), (2, 0.588, 3.964, 33.194), (2, -0.177, 3.636, 31.993), (2, 2.079, 3.280, 27.603), (2, 3.055, 3.958, 30.692), (2, -0.164, 3.188, 29.292), (2, 3.803, 3.151, 25.105), (2, 3.123, -0.891, 1.531), (2, 3.070, -0.824, 1.988), (2, 3.103, -0.931, 1.309), (2, 0.589, 3.353, 29.529), (2, 1.095, 1.973, 20.744), (2, -0.557, 0.370, 12.775), (2, 1.223, 0.307, 10.620), (2, 3.255, -0.768, 2.136), (2, 0.508, 2.157, 22.435), (2, 0.373, 0.319, 11.544), (2, 1.240, 1.736, 19.177), (2, 1.846, 0.970, 13.972), (2, 3.352, -0.534, 3.445), (2, -0.352, -0.290, 8.610), (2, 0.281, 0.193, 10.880), (2, 3.450, -0.059, 6.193), (2, 0.310, 2.575, 25.140), (2, 1.791, 1.127, 14.970), (2, 1.992, 2.347, 22.087), (2, -0.288, 2.881, 27.576), (2, 3.464, 3.664, 28.518), (2, 0.573, 2.789, 26.159), (2, 2.265, 1.583, 17.233), (2, 3.203, 0.730, 11.177), (2, 3.345, 1.368, 14.862), (2, 0.891, 3.690, 31.248), (2, 2.252, -0.311, 5.884), (2, -0.087, 0.804, 14.912), (2, 0.153, 2.510, 24.905), (2, 3.533, -0.965, 0.675), (2, 2.035, 1.953, 19.683), (2, 0.316, 2.448, 24.373), (2, 2.199, 3.858, 30.946), (2, -0.519, 3.647, 32.399), (2, 0.867, 1.961, 20.901), (2, 2.739, 2.268, 20.866), (2, 2.462, -0.664, 3.551), (2, 1.372, 3.419, 29.144), (2, -0.628, 2.723, 26.968), (2, 3.989, -0.225, 4.659), (2, 0.166, 3.190, 28.976), (2, 1.681, 2.937, 25.943), (2, 2.979, 2.263, 20.600), (2, 3.896, -0.419, 3.590), (2, 3.861, 2.224, 19.485), (2, -0.087, -0.861, 4.918), (2, 1.182, 1.886, 20.133), (2, 3.622, 2.320, 20.301), (2, 3.560, 0.008, 6.491), (2, 3.082, -0.605, 3.285), (2, 1.777, 1.324, 16.169), (2, 2.269, 2.436, 22.348), (2, 0.019, 3.074, 28.423), (2, -0.560, 3.868, 33.765), (2, 1.568, 2.886, 25.749), (2, 2.045, 0.222, 9.286), (2, 1.391, 0.352, 10.723), (2, 0.172, 1.908, 21.276), (2, 1.173, -0.726, 4.474), (2, 1.642, 2.576, 23.814), (2, 3.346, 1.377, 14.918), (2, 0.120, 0.411, 12.344), (2, 3.913, 0.820, 11.008), (2, 1.054, 3.732, 31.340), (2, 2.284, 0.108, 8.362), (2, 2.266, 0.066, 8.131), (2, 3.204, 1.156, 13.735), (2, 3.243, 2.032, 18.947), (2, 3.052, -0.121, 6.221), (2, 1.131, 2.189, 22.000), (2, 2.958, 0.658, 10.990), (2, 1.717, 3.708, 30.530), (2, 2.417, 2.070, 20.004), (2, 2.175, 0.881, 13.110), (2, 0.333, 3.494, 30.629), (2, 3.598, 3.940, 30.044), (2, 3.683, -0.110, 5.660), (2, 2.555, 1.196, 14.620), (2, 1.511, 0.453, 11.206), (2, 0.903, 1.390, 17.439), (2, -0.897, 3.303, 30.716), (2, 0.245, 2.129, 22.527), (2, 1.370, 2.715, 24.923), (2, 1.822, -0.917, 2.676), (2, 2.690, -0.109, 6.657), (2, 0.206, 1.561, 19.162), (2, 3.905, 2.710, 22.357), (2, -0.438, 3.207, 29.678), (2, 0.898, 3.445, 29.772), (2, 1.838, 2.871, 25.385), (2, 0.116, 1.401, 18.292), (2, -0.408, 2.375, 24.656), (2, 1.681, 3.338, 28.349), (2, 1.177, -0.318, 6.914), (2, 1.004, 0.626, 12.753), (2, 2.840, 2.589, 22.691), (2, 1.258, 3.993, 32.700), (2, 2.016, 3.489, 28.920), (2, -0.728, 0.164, 11.713), (2, 0.193, 1.479, 18.682), (2, 2.647, -0.969, 1.541), (2, 3.837, 2.602, 21.773), (2, 0.541, 0.205, 10.690), (2, 0.026, 2.756, 26.511), (2, 0.924, 0.909, 14.530), (2, 0.974, -0.074, 8.581), (2, 0.081, 0.005, 9.948), (2, 1.331, 2.942, 26.320), (2, 2.498, 3.405, 27.934), (2, 3.741, 1.554, 15.581), (2, 3.502, -0.089, 5.964), (2, 3.069, 1.768, 17.539), (2, 3.115, -0.008, 6.839), (2, 3.237, -0.503, 3.745), (2, 0.768, -0.135, 8.420), (2, 0.410, 3.974, 33.437), (2, 0.238, -0.700, 5.564), (2, 3.619, 0.350, 8.482), (2, 3.563, 3.059, 24.788), (2, 2.916, 3.101, 25.691), (2, 0.144, 3.282, 29.549), (2, 1.288, 2.642, 24.565), (2, -0.859, 0.229, 12.234), (2, 1.507, -0.711, 4.229), (2, -0.634, 2.608, 26.281), (2, 2.054, -0.834, 2.942), (2, 0.453, 1.072, 15.980), (2, 3.914, 1.159, 13.039), (2, 0.254, 1.835, 20.758), (2, 1.577, 0.428, 10.991), (2, 1.990, 3.569, 29.421), (2, 1.584, 1.803, 19.234), (2, 0.835, 3.603, 30.785), (2, 0.900, 3.033, 27.296), (2, 1.180, 0.280, 10.499), (2, 2.400, 2.802, 24.409), (2, 0.924, 2.462, 23.851), (2, 2.138, 0.722, 12.192), (2, -0.253, -0.809, 5.401), (2, 3.570, -0.116, 5.733), (2, 0.201, -0.182, 8.708), (2, 2.457, 0.454, 10.267), (2, -0.053, 0.443, 12.709), (2, 2.108, 2.069, 20.309), (2, -0.964, -0.441, 8.318), (2, 1.802, 0.403, 10.614), (2, 3.704, 3.902, 29.711), (2, 1.904, 2.418, 22.603), (2, 2.965, 3.429, 27.606), (2, -0.801, -0.072, 10.370), (2, 3.009, 0.491, 9.937), (2, 2.781, 1.026, 13.376), (2, -0.421, 0.744, 14.883), (2, 3.639, -0.148, 5.476), (2, 0.584, 2.041, 21.663), (2, 1.547, -0.391, 6.107), (2, -0.204, 0.727, 14.564), (2, 0.372, 0.464, 12.410), (2, 1.185, 1.732, 19.207), (2, 3.574, 0.755, 10.954), (2, 2.164, 1.425, 16.385), (2, 1.895, 1.374, 16.351), (2, 2.352, 2.188, 20.779), (2, 0.187, 0.677, 13.874), (2, -0.589, 3.686, 32.703), (2, 3.081, 0.414, 9.403), (2, 3.341, 3.246, 26.137), (2, 0.617, -0.201, 8.174), (2, 1.518, 3.833, 31.481), (2, 2.613, -0.350, 5.286), (2, 3.426, 0.751, 11.082), (2, 2.726, 3.586, 28.787), (2, 2.834, -0.219, 5.855), (2, 1.038, 3.607, 30.605), (2, 0.479, 1.226, 16.874), (2, 1.729, 0.297, 10.053), (2, 0.050, 1.815, 20.841), (2, -0.554, 3.538, 31.782), (2, 2.773, 0.973, 13.064), (2, -0.239, 3.425, 30.786), (2, 3.611, 3.700, 28.590), (2, 1.418, 3.625, 30.332), (2, 1.599, 1.626, 18.156), (2, 1.841, 1.518, 17.269), (2, 1.119, 1.996, 20.856), (2, 2.810, 2.293, 20.947), (2, 1.174, 2.062, 21.198), (2, -0.326, -0.279, 8.655), (2, -0.365, 0.816, 15.259), (2, 1.296, -0.095, 8.132), (2, -0.263, 0.511, 13.327), (2, 1.757, 3.012, 26.314), (2, 1.849, 1.065, 14.539), (2, 1.651, 2.244, 21.814), (2, 3.942, 1.026, 12.214), (2, 2.314, 1.944, 19.353), (2, 3.055, -0.002, 6.930), (2, 0.402, 1.350, 17.698), (2, 0.004, 2.288, 23.724), (2, 3.265, 2.962, 24.509), (2, 1.044, -0.684, 4.850), (2, -0.280, 2.278, 23.948), (2, 1.216, 0.726, 13.142), (2, 3.181, 3.518, 27.925), (2, 3.199, -0.124, 6.055), (2, 0.510, -0.622, 5.755), (2, 2.920, 1.067, 13.484), (2, 2.573, 1.844, 18.492), (2, 1.155, 3.505, 29.878), (2, 2.033, 1.756, 18.502), (2, 1.312, 0.114, 9.373), (2, -0.823, 3.339, 30.854), (2, 0.287, 3.891, 33.060), (2, -0.621, -0.210, 9.363), (2, 3.734, 1.574, 15.712), (2, -0.932, 0.772, 15.561), (2, -0.719, 1.604, 20.345), (2, -0.555, 0.773, 15.190), (2, -0.744, 3.934, 34.348), (2, 1.671, -0.425, 5.778), (2, 2.754, 2.690, 23.385), (2, 1.826, 2.185, 21.283), (2, 1.970, 0.021, 8.159), (2, 2.882, 3.494, 28.081), (2, 1.668, -0.030, 8.150), (2, 0.472, 2.184, 22.633), (2, 1.656, 3.393, 28.701), (2, -0.069, 2.331, 24.057), (2, 0.075, 1.341, 17.973), (2, 1.836, 0.565, 11.554), (2, -0.235, 0.520, 13.357), (2, 3.620, 3.169, 25.393), (2, 0.401, -0.062, 9.224), (2, 1.503, 1.667, 18.501), (2, 3.727, 1.149, 13.166), (2, 2.777, -0.081, 6.737), (2, 3.914, -0.234, 4.680), (2, 1.765, 0.750, 12.737), (2, 1.746, 1.818, 19.161), (2, 0.019, 2.819, 26.893), (2, 1.068, 1.917, 20.434), (2, 3.035, 3.158, 25.915), (2, 2.012, 0.724, 12.330), (2, 2.597, 2.264, 20.986), (2, 3.428, 3.239, 26.005), (2, -0.016, -0.529, 6.842), (2, 1.314, 0.735, 13.095), (2, 2.832, -0.567, 3.768), (2, -0.296, 2.641, 26.141), (2, 2.863, 3.889, 30.470), (2, 2.849, 3.997, 31.130), (2, 1.660, 1.813, 19.216), (2, 2.798, 0.977, 13.062), (2, 3.935, 0.549, 9.359), (2, 1.002, 3.557, 30.342), (2, 3.052, 2.207, 20.193), (2, 3.455, 0.458, 9.294), (2, 3.312, 2.138, 19.515), (2, 0.292, 0.058, 10.056), (2, 0.050, -0.211, 8.682), (2, -0.215, 1.108, 16.866), (2, -0.169, 0.647, 14.048), (2, 2.546, 0.876, 12.709), (2, -0.911, -0.209, 9.659), (2, 0.950, 2.894, 26.413), (2, -0.512, -0.167, 9.508), (2, 1.821, -0.747, 3.696), (2, 2.257, 3.945, 31.415), (2, 2.398, -0.586, 4.087), (2, 3.051, 0.815, 11.836), (2, 3.399, 2.131, 19.389), (2, 2.982, 1.549, 16.314), (2, -0.790, -0.329, 8.819), (2, 3.797, 0.327, 8.167), (2, 1.838, 0.290, 9.902), (2, 1.906, 1.782, 18.785), (2, 1.330, -0.208, 7.422), (2, -0.217, 0.854, 15.344), (2, 3.310, 1.582, 16.180), (2, 2.965, 0.917, 12.537), (2, 3.558, -0.164, 5.460), (2, -0.841, 2.060, 23.203), (2, 2.892, 2.621, 22.834), (2, -0.011, -0.198, 8.821), (2, -0.430, 2.999, 28.424), (2, -0.584, 0.894, 15.946), (2, 0.033, 1.310, 17.829), (2, 3.044, 0.410, 9.418), (2, 3.932, 0.295, 7.836), (2, 0.394, 1.315, 17.494), (2, 1.424, -0.167, 7.573), (2, 1.676, 1.118, 15.031), (2, 1.821, 0.714, 12.462), (2, 2.688, 1.497, 16.292), (2, 3.960, 2.344, 20.103), (2, -0.787, -0.161, 9.819), (2, 3.538, 3.651, 28.366), (2, -0.338, 0.458, 13.088), (2, -0.146, 3.162, 29.120), (2, 3.124, 3.352, 26.989), (2, -0.189, 3.685, 32.301), (2, 0.396, 1.004, 15.626), (2, -0.171, 2.114, 22.858), (2, 3.736, 0.732, 10.659), (2, 1.259, 2.564, 24.127), (2, -0.263, 2.426, 24.820), (2, 1.558, -0.858, 3.292), (2, 2.882, 1.110, 13.776), (2, 0.039, 1.284, 17.666), (2, 3.074, 2.379, 21.201), (2, -0.523, 0.303, 12.344), (2, 0.363, 1.082, 16.132), (2, 2.925, 2.187, 20.195), (2, 0.595, -0.335, 7.397), (2, 0.062, -0.232, 8.544), (2, 0.877, 2.155, 22.050), (2, -0.256, 2.922, 27.788), (2, 1.813, 3.161, 27.152), (2, 2.177, 2.532, 23.016), (2, -0.051, 0.035, 10.263), (2, 2.688, 3.599, 28.906), (2, 2.539, -0.076, 7.008), (2, 2.563, 1.467, 16.240), (2, -0.755, 2.276, 24.410), (2, 3.092, 0.660, 10.868), (2, 2.403, 2.693, 23.756), (2, -0.170, 2.178, 23.239), (2, 2.672, -0.603, 3.712), (2, -0.077, -0.493, 7.116), (2, 1.997, 1.934, 19.608), (2, 1.913, -0.792, 3.335), (2, 0.171, -0.329, 7.857), (2, 2.488, 0.171, 8.540), (2, -0.514, 0.331, 12.500), (2, -0.201, 2.484, 25.103), (2, 2.436, 0.032, 7.759), (2, -0.094, 2.530, 25.275), (2, 2.186, 2.591, 23.358), (2, 3.171, -0.766, 2.231), (2, 2.410, 0.183, 8.687), (2, -0.699, -0.329, 8.728), (2, 3.285, 2.252, 20.228), (2, 1.928, -0.059, 7.720), (2, 3.460, 0.399, 8.931), (2, 2.542, 0.224, 8.801), (2, 2.902, 2.101, 19.702), (2, 3.808, 2.528, 21.358), (2, 0.330, 0.642, 13.522), (2, -0.088, 1.286, 17.804), (2, 3.025, 2.354, 21.100), (2, 3.306, 2.049, 18.986), (2, 1.477, 1.720, 18.845), (2, 2.676, 3.601, 28.931), (2, 1.577, 0.170, 9.443), (2, 1.362, 3.534, 29.843), (2, 2.616, 3.106, 26.018), (2, 3.773, 0.378, 8.496), (2, -0.125, 2.057, 22.465), (2, 3.174, 1.382, 15.120), (2, 0.844, 2.058, 21.503);
 
 SELECT ANS[1] > -1.1 AND ANS[1] < -0.9 AND ANS[2] > 5.9 AND ANS[2] < 6.1 AND ANS[3] > 9.9 AND ANS[3] < 10.1 FROM
-(SELECT stochasticLinearRegression(0.05, 0, 1, 'SGD')(target, p1, p2) AS ANS FROM grouptest GROUP BY user_id LIMIT 0, 1);
+(SELECT stochasticLinearRegression(0.05, 0, 1, 'SGD')(target, p1, p2) AS ANS FROM grouptest GROUP BY user_id ORDER BY user_id LIMIT 1, 1);
 
 SELECT ANS[1] > 1.9 AND ANS[1] < 2.1 AND ANS[2] > 2.9 AND ANS[2] < 3.1 AND ANS[3] > -3.1 AND ANS[3] < -2.9 FROM
-(SELECT stochasticLinearRegression(0.05, 0, 1, 'SGD')(target, p1, p2) AS ANS FROM grouptest GROUP BY user_id LIMIT 1, 1);
+(SELECT stochasticLinearRegression(0.05, 0, 1, 'SGD')(target, p1, p2) AS ANS FROM grouptest GROUP BY user_id ORDER BY user_id LIMIT 0, 1);
 
 DROP TABLE defaults;
 DROP TABLE model;

From 5251548753d144d3454f617cfb632efad6d5fd09 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Fri, 8 Dec 2023 17:28:26 +0000
Subject: [PATCH 031/137] Fix tests and build

---
 .../WriteBufferFromHTTPServerResponse.cpp     |   2 +-
 src/Server/PrometheusRequestHandler.cpp       |   8 +-
 .../__init__.py                               |   0
 .../configs/remote_servers.xml                |  40 -----
 .../test.py                                   | 157 ------------------
 5 files changed, 7 insertions(+), 200 deletions(-)
 delete mode 100644 tests/integration/test_max_http_connections_for_replication/__init__.py
 delete mode 100644 tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml
 delete mode 100644 tests/integration/test_max_http_connections_for_replication/test.py

diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
index ac722656899..1a12c09a8c7 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
@@ -136,7 +136,7 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
 WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
     HTTPServerResponse & response_,
     bool is_http_method_head_,
-    size_t keep_alive_timeout_,
+    UInt64 keep_alive_timeout_,
     bool compress_,
     CompressionMethod compression_method_)
     : BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE)
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index b871d1b0ddc..127ed843cb6 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -1,7 +1,6 @@
 #include <Server/PrometheusRequestHandler.h>
 
 #include <IO/HTTPCommon.h>
-#include <Interpreters/Context.h>
 #include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
 #include <Server/HTTPHandlerFactory.h>
 #include <Server/IServer.h>
@@ -18,7 +17,12 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
 {
     try
     {
-        const auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
+        /// Raw config reference is used here to avoid dependency on Context and ServerSettings.
+        /// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1
+        /// And there ordinary Context is replaced with a tiny clone.
+        const auto & config = server.config();
+        unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
+
         setResponseDefaultHeaders(response, keep_alive_timeout);
 
         response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
diff --git a/tests/integration/test_max_http_connections_for_replication/__init__.py b/tests/integration/test_max_http_connections_for_replication/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml b/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml
deleted file mode 100644
index e62425fe1bb..00000000000
--- a/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml
+++ /dev/null
@@ -1,40 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <test_cluster>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <default_database>test</default_database>
-                    <host>node1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <default_database>test</default_database>
-                    <host>node2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster>
-        <test_cluster>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <default_database>test</default_database>
-                    <host>node3</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <default_database>test</default_database>
-                    <host>node4</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <default_database>test</default_database>
-                    <host>node5</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster>
-
-    </remote_servers>
-</clickhouse>
diff --git a/tests/integration/test_max_http_connections_for_replication/test.py b/tests/integration/test_max_http_connections_for_replication/test.py
deleted file mode 100644
index bcb779ee913..00000000000
--- a/tests/integration/test_max_http_connections_for_replication/test.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import time
-from multiprocessing.dummy import Pool
-
-import pytest
-from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import assert_eq_with_retry
-
-
-def _fill_nodes(nodes, shard, connections_count):
-    for node in nodes:
-        node.query(
-            """
-                CREATE DATABASE test;
-
-                CREATE TABLE test_table(date Date, id UInt32, dummy UInt32)
-                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}')
-                PARTITION BY date
-                ORDER BY id
-                SETTINGS
-                    replicated_max_parallel_fetches_for_host={connections},
-                    index_granularity=8192;
-            """.format(
-                shard=shard, replica=node.name, connections=connections_count
-            )
-        )
-
-
-cluster = ClickHouseCluster(__file__)
-node1 = cluster.add_instance(
-    "node1",
-    user_configs=[],
-    main_configs=["configs/remote_servers.xml"],
-    with_zookeeper=True,
-)
-node2 = cluster.add_instance(
-    "node2",
-    user_configs=[],
-    main_configs=["configs/remote_servers.xml"],
-    with_zookeeper=True,
-)
-
-
-@pytest.fixture(scope="module")
-def start_small_cluster():
-    try:
-        cluster.start()
-
-        _fill_nodes([node1, node2], 1, 1)
-
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-def test_single_endpoint_connections_count(start_small_cluster):
-    node1.query("TRUNCATE TABLE test_table")
-    node2.query("SYSTEM SYNC REPLICA test_table")
-
-    def task(count):
-        print(("Inserting ten times from {}".format(count)))
-        for i in range(count, count + 10):
-            node1.query("insert into test_table values ('2017-06-16', {}, 0)".format(i))
-
-    p = Pool(10)
-    p.map(task, range(0, 100, 10))
-
-    assert_eq_with_retry(node1, "select count() from test_table", "100")
-    assert_eq_with_retry(node2, "select count() from test_table", "100")
-
-    assert (
-        node2.query(
-            "SELECT value FROM system.events where event='CreatedHTTPConnections'"
-        )
-        == "1\n"
-    )
-
-
-def test_keepalive_timeout(start_small_cluster):
-    node1.query("TRUNCATE TABLE test_table")
-    node2.query("SYSTEM SYNC REPLICA test_table")
-
-    node1.query("insert into test_table values ('2017-06-16', 777, 0)")
-    assert_eq_with_retry(node2, "select count() from test_table", str(1))
-    # Server keepAliveTimeout is 3 seconds, default client session timeout is 8
-    # lets sleep in that interval
-    time.sleep(4)
-
-    node1.query("insert into test_table values ('2017-06-16', 888, 0)")
-
-    time.sleep(3)
-
-    assert_eq_with_retry(node2, "select count() from test_table", str(2))
-
-    assert not node2.contains_in_log(
-        "No message received"
-    ), "Found 'No message received' in clickhouse-server.log"
-
-
-node3 = cluster.add_instance(
-    "node3",
-    user_configs=[],
-    main_configs=["configs/remote_servers.xml"],
-    with_zookeeper=True,
-)
-node4 = cluster.add_instance(
-    "node4",
-    user_configs=[],
-    main_configs=["configs/remote_servers.xml"],
-    with_zookeeper=True,
-)
-node5 = cluster.add_instance(
-    "node5",
-    user_configs=[],
-    main_configs=["configs/remote_servers.xml"],
-    with_zookeeper=True,
-)
-
-
-@pytest.fixture(scope="module")
-def start_big_cluster():
-    try:
-        cluster.start()
-
-        _fill_nodes([node3, node4, node5], 2, 2)
-
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-def test_multiple_endpoint_connections_count(start_big_cluster):
-    def task(count):
-        print(("Inserting ten times from {}".format(count)))
-        if (count / 10) % 2 == 1:
-            node = node3
-        else:
-            node = node4
-
-        for i in range(count, count + 10):
-            node.query("insert into test_table values ('2017-06-16', {}, 0)".format(i))
-
-    p = Pool(10)
-    p.map(task, range(0, 100, 10))
-
-    assert_eq_with_retry(node3, "select count() from test_table", "100")
-    assert_eq_with_retry(node4, "select count() from test_table", "100")
-    assert_eq_with_retry(node5, "select count() from test_table", "100")
-
-    # Two per each host or sometimes less, if fetches are not performed in parallel. But not more.
-    assert (
-        node5.query(
-            "SELECT value FROM system.events where event='CreatedHTTPConnections'"
-        )
-        <= "4\n"
-    )

From 951f971d73b73be590c9ce25e7d52db41737d4c0 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 8 Dec 2023 19:20:48 +0000
Subject: [PATCH 032/137] fix some tests

---
 .../00166_functions_of_aggregation_states.sql     |  3 +++
 .../01012_reset_running_accumulate.sql            |  3 +++
 ...emental_streaming_from_2_src_with_feedback.sql |  3 +++
 .../01088_array_slice_of_aggregate_functions.sql  |  2 +-
 .../0_stateless/01472_many_rows_in_totals.sql     |  3 +++
 .../0_stateless/01799_long_uniq_theta_sketch.sql  |  3 +++
 .../01913_exact_rows_before_limit_full.reference  |  2 +-
 .../01913_exact_rows_before_limit_full.sql        | 10 +++++-----
 tests/queries/0_stateless/02366_kql_extend.sql    |  7 +++++--
 .../queries/0_stateless/02366_kql_makeseries.sql  | 15 +++++++++------
 10 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/tests/queries/0_stateless/00166_functions_of_aggregation_states.sql b/tests/queries/0_stateless/00166_functions_of_aggregation_states.sql
index b73a04e19b9..0a5a84bbb46 100644
--- a/tests/queries/0_stateless/00166_functions_of_aggregation_states.sql
+++ b/tests/queries/0_stateless/00166_functions_of_aggregation_states.sql
@@ -1 +1,4 @@
+-- Disable external aggregation because the state is reset for each new block of data in 'runningAccumulate' function.
+SET max_bytes_before_external_group_by = 0;
+
 SELECT k, finalizeAggregation(sum_state), runningAccumulate(sum_state) FROM (SELECT intDiv(number, 50000) AS k, sumState(number) AS sum_state FROM (SELECT number FROM system.numbers LIMIT 1000000) GROUP BY k ORDER BY k);
diff --git a/tests/queries/0_stateless/01012_reset_running_accumulate.sql b/tests/queries/0_stateless/01012_reset_running_accumulate.sql
index b9336b2f50c..c2c5bf6f87d 100644
--- a/tests/queries/0_stateless/01012_reset_running_accumulate.sql
+++ b/tests/queries/0_stateless/01012_reset_running_accumulate.sql
@@ -1,3 +1,6 @@
+-- Disable external aggregation because the state is reset for each new block of data in 'runningAccumulate' function.
+SET max_bytes_before_external_group_by = 0;
+
 SELECT grouping,
        item,
        runningAccumulate(state, grouping)
diff --git a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql
index 3d75fb0ccc9..ae90dc3cc72 100644
--- a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql
+++ b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql
@@ -1,5 +1,8 @@
 SET joined_subquery_requires_alias = 0;
 SET max_threads = 1;
+-- It affects number of read rows and max_rows_to_read.
+SET max_bytes_before_external_sort = 0;
+SET max_bytes_before_external_group_by = 0;
 
 -- incremental streaming usecase
 -- that has sense only if data filling order has guarantees of chronological order
diff --git a/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql b/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql
index ba525f30228..c8466b57051 100644
--- a/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql
+++ b/tests/queries/0_stateless/01088_array_slice_of_aggregate_functions.sql
@@ -1 +1 @@
-select arraySlice(groupArray(x),1,1) as y from (select uniqState(number) as x from numbers(10) group by number);
+select arraySlice(groupArray(x), 1, 1) as y from (select uniqState(number) as x from numbers(10) group by number order by number);
diff --git a/tests/queries/0_stateless/01472_many_rows_in_totals.sql b/tests/queries/0_stateless/01472_many_rows_in_totals.sql
index d79d189a28d..bea8c255f21 100644
--- a/tests/queries/0_stateless/01472_many_rows_in_totals.sql
+++ b/tests/queries/0_stateless/01472_many_rows_in_totals.sql
@@ -1,4 +1,7 @@
+-- Disable external aggregation because it may produce several blocks instead of one.
+set max_bytes_before_external_group_by = 0;
 set output_format_write_statistics = 0;
+
 select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format Pretty;
 select '--';
 
diff --git a/tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql b/tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql
index 37f0c31ab10..475f156af93 100644
--- a/tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql
+++ b/tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql
@@ -1,5 +1,8 @@
 -- Tags: long, no-fasttest
 
+-- The result slightly differs but it's ok since `uniqueTheta` is an approximate function.
+set group_by_two_level_threshold_bytes = 0;
+
 SELECT 'uniqTheta';
 
 SELECT Y, uniqTheta(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y ORDER BY Y;
diff --git a/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference
index a0f4560ca1c..95ce4d6428d 100644
--- a/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference
+++ b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference
@@ -45,7 +45,7 @@
 
 	"data":
 	[
-		[12]
+		[10]
 	],
 
 	"rows": 1,
diff --git a/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql
index 84f97090169..07e54fb2ec2 100644
--- a/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql
+++ b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql
@@ -10,20 +10,20 @@ set exact_rows_before_limit = 1, output_format_write_statistics = 0, max_block_s
 
 select * from test limit 1 FORMAT JSONCompact;
 
-select * from test where i < 10 group by i limit 1 FORMAT JSONCompact;
+select * from test where i < 10 group by i order by i limit 1 FORMAT JSONCompact;
 
-select * from test group by i having i in (10, 11, 12) limit 1 FORMAT JSONCompact;
+select * from test group by i having i in (10, 11, 12) order by i limit 1 FORMAT JSONCompact;
 
 select * from test where i < 20 order by i limit 1 FORMAT JSONCompact;
 
 set prefer_localhost_replica = 0;
-select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 limit 1 FORMAT JSONCompact;
+select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 order by i limit 1 FORMAT JSONCompact;
 select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 order by i limit 1 FORMAT JSONCompact;
 
 set prefer_localhost_replica = 1;
-select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 limit 1 FORMAT JSONCompact;
+select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 order by i limit 1 FORMAT JSONCompact;
 select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 order by i limit 1 FORMAT JSONCompact;
 
-select * from (select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 10) limit 1 FORMAT JSONCompact;
+select * from (select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 10) order by i limit 1 FORMAT JSONCompact;
 
 drop table if exists test;
diff --git a/tests/queries/0_stateless/02366_kql_extend.sql b/tests/queries/0_stateless/02366_kql_extend.sql
index 3de489b0815..0a3c1f3dcd4 100644
--- a/tests/queries/0_stateless/02366_kql_extend.sql
+++ b/tests/queries/0_stateless/02366_kql_extend.sql
@@ -12,16 +12,19 @@
 --     'Costco','Snargaluff',200,'2016-09-10',
 -- ]
 
+
 DROP TABLE IF EXISTS Ledger;
 CREATE TABLE Ledger
-(    
+(
    Supplier Nullable(String),
    Fruit String ,
    Price Float64,
-   Purchase Date 
+   Purchase Date
 ) ENGINE = Memory;
 INSERT INTO Ledger VALUES  ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10');
 
+-- This test requies sorting after some of aggregations but I don't know KQL, sorry
+set max_bytes_before_external_group_by = 0;
 set dialect = 'kusto';
 
 print '-- extend #1 --';
diff --git a/tests/queries/0_stateless/02366_kql_makeseries.sql b/tests/queries/0_stateless/02366_kql_makeseries.sql
index ecf2ef43cc4..c9ca91c0be0 100644
--- a/tests/queries/0_stateless/02366_kql_makeseries.sql
+++ b/tests/queries/0_stateless/02366_kql_makeseries.sql
@@ -14,31 +14,34 @@
 -- ];
 DROP TABLE IF EXISTS make_series_test_table;
 CREATE TABLE make_series_test_table
-(    
+(
    Supplier Nullable(String),
    Fruit String ,
    Price Float64,
-   Purchase Date 
+   Purchase Date
 ) ENGINE = Memory;
 INSERT INTO make_series_test_table VALUES  ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10');
 DROP TABLE IF EXISTS make_series_test_table2;
 CREATE TABLE make_series_test_table2
-(    
+(
    Supplier Nullable(String),
    Fruit String ,
    Price Int32,
-   Purchase Int32  
+   Purchase Int32
 ) ENGINE = Memory;
 INSERT INTO make_series_test_table2 VALUES  ('Aldi','Apple',4,10),('Costco','Apple',2,11),('Aldi','Apple',6,10),('Costco','Snargaluff',100,12),('Aldi','Apple',7,12),('Aldi','Snargaluff',400,11),('Costco','Snargaluff',104,12),('Aldi','Apple',5,12),('Aldi','Snargaluff',600,11),('Costco','Snargaluff',200,10);
 DROP TABLE IF EXISTS make_series_test_table3;
 CREATE TABLE make_series_test_table3
-(    
+(
     timestamp datetime,
     metric Float64,
 ) ENGINE = Memory;
 INSERT INTO make_series_test_table3 VALUES (parseDateTimeBestEffort('2016-12-31T06:00', 'UTC'), 50), (parseDateTimeBestEffort('2017-01-01', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-02', 'UTC'), 3), (parseDateTimeBestEffort('2017-01-03', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-03T03:00', 'UTC'), 6), (parseDateTimeBestEffort('2017-01-05', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-05T13:40', 'UTC'), 13), (parseDateTimeBestEffort('2017-01-06', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-07', 'UTC'), 3), (parseDateTimeBestEffort('2017-01-08', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-08T21:00', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-09', 'UTC'), 2), (parseDateTimeBestEffort('2017-01-09T12:00', 'UTC'), 11), (parseDateTimeBestEffort('2017-01-10T05:00', 'UTC'), 5);
 
+-- This test requies sorting after some of aggregations but I don't know KQL, sorry
+set max_bytes_before_external_group_by = 0;
 set dialect = 'kusto';
+
 print '-- from to';
 make_series_test_table |  make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10)  to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit;
 print '-- from';
@@ -68,7 +71,7 @@ make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase
 print '-- without by';
 make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step  2.0;
 
-make_series_test_table3 | make-series avg(metric) default=0  on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d 
+make_series_test_table3 | make-series avg(metric) default=0  on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d
 
 -- print '-- summarize --'
 -- make_series_test_table | summarize count() by format_datetime(bin(Purchase, 1d), 'yy-MM-dd');

From 7cd770e9a73c14ea078607b360f66506d639a269 Mon Sep 17 00:00:00 2001
From: Chen Lixiang <chenlixiang.dev@gmail.com>
Date: Sat, 9 Dec 2023 20:09:41 +0800
Subject: [PATCH 033/137] fix doc and comments

---
 docs/en/operations/system-tables/tables.md | 2 +-
 src/Storages/IStorage.h                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md
index 231be6404a3..364692a934c 100644
--- a/docs/en/operations/system-tables/tables.md
+++ b/docs/en/operations/system-tables/tables.md
@@ -57,7 +57,7 @@ Columns:
     - If the table stores data on disk, returns used space on disk (i.e. compressed).
     - If the table stores data in memory, returns approximated number of used bytes in memory.
 
-- `total_bytes_uncompressed` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of uncompressed bytes, if it is possible to quickly determine exact number of bytes from checksums for the table on storage, otherwise `NULL` (does not includes any underlying storage).
+- `total_bytes_uncompressed` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of uncompressed bytes, if it's possible to quickly determine the exact number of bytes from the part checksums for the table on storage, otherwise `NULL` (does not take underlying storage (if any) into account).
 
 - `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables).
 
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index ecdf3231997..c50de2ad6bc 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -699,7 +699,7 @@ public:
     /// Used for:
     /// - For total_bytes_uncompressed column in system.tables
     ///
-    /// Does not takes underlying Storage (if any) into account
+    /// Does not take underlying Storage (if any) into account
     virtual std::optional<UInt64> totalBytesUncompressed(const Settings &) const { return {}; }
     
     /// Number of rows INSERTed since server start.

From 30b09f4287323c03a39d0e74c0b11c0603ed2813 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 11 Dec 2023 12:15:42 +0000
Subject: [PATCH 034/137] fix some tests

---
 .../queries/0_stateless/00109_shard_totals_after_having.sql | 3 +++
 tests/queries/0_stateless/00119_storage_join.sql            | 2 +-
 .../00184_shard_distributed_group_by_no_merge.reference     | 2 +-
 .../00184_shard_distributed_group_by_no_merge.sql           | 2 +-
 tests/queries/0_stateless/00273_quantiles.sql               | 3 +++
 .../00410_aggregation_combinators_with_arenas.sql           | 4 ++++
 tests/queries/0_stateless/00808_not_optimize_predicate.sql  | 6 ++++--
 .../01244_optimize_distributed_group_by_sharding_key.sql    | 3 +++
 tests/queries/0_stateless/01563_distributed_query_finish.sh | 2 +-
 tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql  | 2 +-
 .../0_stateless/02496_remove_redundant_sorting.reference    | 2 +-
 tests/queries/0_stateless/02496_remove_redundant_sorting.sh | 2 +-
 .../02496_remove_redundant_sorting_analyzer.reference       | 2 +-
 .../0_stateless/02500_remove_redundant_distinct.reference   | 6 +++---
 .../queries/0_stateless/02500_remove_redundant_distinct.sh  | 4 ++--
 .../02500_remove_redundant_distinct_analyzer.reference      | 6 +++---
 tests/queries/0_stateless/02567_and_consistency.sql         | 4 ++--
 17 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/tests/queries/0_stateless/00109_shard_totals_after_having.sql b/tests/queries/0_stateless/00109_shard_totals_after_having.sql
index b17accc0dae..dce265e0552 100644
--- a/tests/queries/0_stateless/00109_shard_totals_after_having.sql
+++ b/tests/queries/0_stateless/00109_shard_totals_after_having.sql
@@ -4,6 +4,9 @@ SET max_rows_to_group_by = 100000;
 SET max_block_size = 100001;
 SET group_by_overflow_mode = 'any';
 
+-- Settings 'max_rows_to_group_by' and 'max_bytes_before_external_group_by' are mutually exclusive.
+SET max_bytes_before_external_group_by = 0;
+
 DROP TABLE IF EXISTS numbers500k;
 CREATE TABLE  numbers500k (number UInt32) ENGINE = TinyLog;
 
diff --git a/tests/queries/0_stateless/00119_storage_join.sql b/tests/queries/0_stateless/00119_storage_join.sql
index 2569a64d2c3..cd255cdfe24 100644
--- a/tests/queries/0_stateless/00119_storage_join.sql
+++ b/tests/queries/0_stateless/00119_storage_join.sql
@@ -12,7 +12,7 @@ SELECT x, s, k FROM (SELECT number AS k FROM system.numbers LIMIT 10) js1 ANY LE
 SELECT 1, x, 2, s, 3, k, 4 FROM (SELECT number AS k FROM system.numbers LIMIT 10) js1 ANY LEFT JOIN t2 USING k;
 
 SELECT t1.k, t1.s, t2.x
-FROM ( SELECT number AS k, 'a' AS s FROM numbers(2) GROUP BY number WITH TOTALS ) AS t1
+FROM ( SELECT number AS k, 'a' AS s FROM numbers(2) GROUP BY number WITH TOTALS ORDER BY number) AS t1
 ANY LEFT JOIN t2 AS t2 USING(k);
 
 DROP TABLE t2;
diff --git a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference
index b2b0b43e490..72828aae5a9 100644
--- a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference
+++ b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference
@@ -36,9 +36,9 @@ GROUP BY ORDER BY
 1
 GROUP BY w/ ALIAS
 0
-1
 0
 1
+1
 ORDER BY w/ ALIAS
 0
 func(aggregate function) GROUP BY
diff --git a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql
index 422f4a010f1..1bd6cbe8948 100644
--- a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql
+++ b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql
@@ -34,7 +34,7 @@ SELECT uniq(number) u FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184
 
 -- cover possible tricky issues
 SELECT 'GROUP BY w/ ALIAS';
-SELECT n FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184) GROUP BY number AS n SETTINGS distributed_group_by_no_merge=2;
+SELECT n FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184) GROUP BY number AS n ORDER BY n SETTINGS distributed_group_by_no_merge=2;
 
 SELECT 'ORDER BY w/ ALIAS';
 SELECT n FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184) ORDER BY number AS n LIMIT 1 SETTINGS distributed_group_by_no_merge=2;
diff --git a/tests/queries/0_stateless/00273_quantiles.sql b/tests/queries/0_stateless/00273_quantiles.sql
index 9fef1f63057..f5b739b8be1 100644
--- a/tests/queries/0_stateless/00273_quantiles.sql
+++ b/tests/queries/0_stateless/00273_quantiles.sql
@@ -8,4 +8,7 @@ SELECT quantilesExact(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0
 SELECT quantilesTDigest(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001);
 SELECT quantilesDeterministic(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x, x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001);
 
+-- The result slightly differs but it's ok since `quantilesDeterministic` is an approximate function.
+SET max_bytes_before_external_group_by = 0;
+
 SELECT round(1000000 / (number + 1)) AS k, count() AS c, arrayMap(x -> round(x, 6), quantilesDeterministic(0.1, 0.5, 0.9)(number, intHash64(number))) AS q1, quantilesExact(0.1, 0.5, 0.9)(number) AS q2 FROM (SELECT number FROM system.numbers LIMIT 1000000) GROUP BY k ORDER BY k;
diff --git a/tests/queries/0_stateless/00410_aggregation_combinators_with_arenas.sql b/tests/queries/0_stateless/00410_aggregation_combinators_with_arenas.sql
index 67f5cc54afd..a3abbb9fd58 100644
--- a/tests/queries/0_stateless/00410_aggregation_combinators_with_arenas.sql
+++ b/tests/queries/0_stateless/00410_aggregation_combinators_with_arenas.sql
@@ -7,4 +7,8 @@ DROP TABLE IF EXISTS arena;
 
 SELECT length(arrayReduce('groupUniqArray', [[1, 2], [1],  emptyArrayUInt8(), [1], [1, 2]]));
 SELECT min(x), max(x) FROM (SELECT length(arrayReduce('groupUniqArray', [hex(number), hex(number+1), hex(number)])) AS x FROM system.numbers LIMIT 100000);
+
+-- Disable external aggregation because the state is reset for each new block of data in 'runningAccumulate' function.
+SET max_bytes_before_external_group_by = 0;
+
 SELECT sum(length(runningAccumulate(x))) FROM (SELECT groupUniqArrayState(toString(number % 10)) AS x, number FROM (SELECT * FROM system.numbers LIMIT 11) GROUP BY number ORDER BY number);
diff --git a/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/tests/queries/0_stateless/00808_not_optimize_predicate.sql
index ba8f5eb5753..d0dda14e026 100644
--- a/tests/queries/0_stateless/00808_not_optimize_predicate.sql
+++ b/tests/queries/0_stateless/00808_not_optimize_predicate.sql
@@ -48,7 +48,8 @@ SELECT
     intDiv(number, 25) AS n,
     avgState(number) AS s
 FROM numbers(2500)
-GROUP BY n;
+GROUP BY n
+ORDER BY n;
 
 SET force_primary_key = 1, enable_optimize_predicate_expression = 1;
 
@@ -60,7 +61,8 @@ FROM
         finalizeAggregation(s)
     FROM test_00808_push_down_with_finalizeAggregation
 )
-WHERE (n >= 2) AND (n <= 5);
+WHERE (n >= 2) AND (n <= 5)
+ORDER BY n;
 
 EXPLAIN SYNTAX SELECT *
 FROM
diff --git a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql
index 291910ed43f..0d24b238d64 100644
--- a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql
+++ b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql
@@ -4,6 +4,9 @@
 
 set optimize_distributed_group_by_sharding_key=1;
 
+-- Some queries in this test require sorting after aggregation.
+set max_bytes_before_external_group_by = 0;
+
 drop table if exists dist_01247;
 drop table if exists data_01247;
 
diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.sh b/tests/queries/0_stateless/01563_distributed_query_finish.sh
index b49042ead9d..0019c714e40 100755
--- a/tests/queries/0_stateless/01563_distributed_query_finish.sh
+++ b/tests/queries/0_stateless/01563_distributed_query_finish.sh
@@ -28,7 +28,7 @@ opts=(
     "--prefer_localhost_replica=0"
 )
 $CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm <<EOL
-select count(), * from dist_01247 group by number limit 1;
+select count(), * from dist_01247 group by number order by number limit 1;
 EOL
 
 # expect zero new network errors
diff --git a/tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql b/tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql
index 475f156af93..9cd75de6abc 100644
--- a/tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql
+++ b/tests/queries/0_stateless/01799_long_uniq_theta_sketch.sql
@@ -1,7 +1,7 @@
 -- Tags: long, no-fasttest
 
 -- The result slightly differs but it's ok since `uniqueTheta` is an approximate function.
-set group_by_two_level_threshold_bytes = 0;
+set max_bytes_before_external_group_by = 0;
 
 SELECT 'uniqTheta';
 
diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
index b38cf176008..e7c169cf45e 100644
--- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
+++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference
@@ -113,7 +113,7 @@ FROM
     )
     ORDER BY number DESC
 ) AS t2
-ORDER BY number
+ORDER BY t1.number, t2.number
 -- explain
 Expression (Projection)
   Sorting (Sorting for ORDER BY)
diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh
index d35892432a5..c676e0340b1 100755
--- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh
+++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh
@@ -97,7 +97,7 @@ FROM
     )
     ORDER BY number DESC
 ) AS t2
-ORDER BY number"
+ORDER BY t1.number, t2.number"
 run_query "$query"
 
 echo "-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries"
diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference
index b2ac9e4533b..16d3327b9c2 100644
--- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference
+++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference
@@ -113,7 +113,7 @@ FROM
     )
     ORDER BY number DESC
 ) AS t2
-ORDER BY number
+ORDER BY t1.number, t2.number
 -- explain
 Expression (Project names)
   Sorting (Sorting for ORDER BY)
diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
index 5348d407097..d7623cd5541 100644
--- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
+++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference
@@ -73,7 +73,7 @@ FROM
     SELECT DISTINCT number AS n
     FROM numbers(2)
 ) as y
-ORDER BY n
+ORDER BY x.n, y.n
 -- explain
 Expression (Projection)
   Distinct
@@ -155,7 +155,7 @@ FROM
     FROM VALUES('Hello', 'World', 'Goodbye')
 ) AS words
 ARRAY JOIN [0, 1] AS arr
-ORDER BY arr
+ORDER BY c1, arr
 -- explain
 Expression (Projection)
   Distinct
@@ -169,9 +169,9 @@ Expression (Projection)
                   Expression (Before ORDER BY)
                     ReadFromStorage (Values)
 -- execute
+Goodbye
 Hello
 World
-Goodbye
 -- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs
 -- query
 SELECT DISTINCT *
diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh
index f83fcff07c1..c4f0994cd13 100755
--- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh
+++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh
@@ -60,7 +60,7 @@ FROM
     SELECT DISTINCT number AS n
     FROM numbers(2)
 ) as y
-ORDER BY n"
+ORDER BY x.n, y.n"
 run_query "$query"
 
 echo "-- DISTINCT duplicates with several columns"
@@ -99,7 +99,7 @@ FROM
     FROM VALUES('Hello', 'World', 'Goodbye')
 ) AS words
 ARRAY JOIN [0, 1] AS arr
-ORDER BY arr"
+ORDER BY c1, arr"
 run_query "$query"
 
 echo "-- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference
index 798191db7e4..b79f6310166 100644
--- a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference
+++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference
@@ -74,7 +74,7 @@ FROM
     SELECT DISTINCT number AS n
     FROM numbers(2)
 ) as y
-ORDER BY n
+ORDER BY x.n, y.n
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
@@ -157,7 +157,7 @@ FROM
     FROM VALUES('Hello', 'World', 'Goodbye')
 ) AS words
 ARRAY JOIN [0, 1] AS arr
-ORDER BY arr
+ORDER BY c1, arr
 -- explain
 Expression (Project names)
   Distinct (DISTINCT)
@@ -172,9 +172,9 @@ Expression (Project names)
                     Expression ((Projection + Change column names to column identifiers))
                       ReadFromStorage (Values)
 -- execute
+Goodbye
 Hello
 World
-Goodbye
 -- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs
 -- query
 SELECT DISTINCT *
diff --git a/tests/queries/0_stateless/02567_and_consistency.sql b/tests/queries/0_stateless/02567_and_consistency.sql
index 8ad06bd68cb..ca0c0e8ab77 100644
--- a/tests/queries/0_stateless/02567_and_consistency.sql
+++ b/tests/queries/0_stateless/02567_and_consistency.sql
@@ -76,10 +76,10 @@ GROUP BY g;
 select '=';
 SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null
                   FROM t2 AS left
-                  GROUP BY g HAVING h SETTINGS enable_optimize_predicate_expression = 0;
+                  GROUP BY g HAVING h ORDER BY g DESC SETTINGS enable_optimize_predicate_expression = 0;
 select '=';
 SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null
                   FROM t2 AS left
-                  GROUP BY g HAVING h SETTINGS enable_optimize_predicate_expression = 1;
+                  GROUP BY g HAVING h ORDER BY g DESC SETTINGS enable_optimize_predicate_expression = 1;
 
 DROP TABLE IF EXISTS t2;

From 01c10e10857efcdd649433b968360699442a1918 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 11 Dec 2023 17:08:01 +0000
Subject: [PATCH 035/137] fix some tests

---
 .../00133_long_shard_memory_tracker_and_exception_safety.sh   | 2 +-
 tests/queries/0_stateless/00953_moving_functions.sql          | 4 ++++
 tests/queries/0_stateless/01193_metadata_loading.sh           | 2 +-
 .../01513_optimize_aggregation_in_order_memory_long.sql       | 1 +
 .../0_stateless/01514_distributed_cancel_query_on_error.sh    | 1 +
 tests/queries/0_stateless/01591_window_functions.sql          | 4 ++++
 tests/queries/0_stateless/02352_rwlock.sh                     | 4 ++--
 7 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh
index 389a2cd9684..e6e82e8c976 100755
--- a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh
+++ b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh
@@ -15,7 +15,7 @@ if [ -n "$DBMS_TESTS_UNDER_VALGRIND" ]; then
     STEP_MULTIPLIER=1000
 fi
 
-for i in $(seq 1000000 $((20000 * $STEP_MULTIPLIER)) 10000000 && seq 10100000 $((100000 * $STEP_MULTIPLIER)) 50000000); do
+for i in $(seq 1000000 $((20000 * $STEP_MULTIPLIER)) 10000000 && seq 10100000 $((100000 * $STEP_MULTIPLIER)) 100000000); do
     $CLICKHOUSE_CLIENT --max_memory_usage="$i" --query="
         SELECT intDiv(number, 5) AS k, max(toString(number)) FROM remote('127.0.0.{2,3}', ${CLICKHOUSE_DATABASE}.numbers_100k) GROUP BY k ORDER BY k LIMIT 1;
     " 2> /dev/null;
diff --git a/tests/queries/0_stateless/00953_moving_functions.sql b/tests/queries/0_stateless/00953_moving_functions.sql
index daaceeeb3ac..b9046158a16 100644
--- a/tests/queries/0_stateless/00953_moving_functions.sql
+++ b/tests/queries/0_stateless/00953_moving_functions.sql
@@ -24,6 +24,10 @@ INSERT INTO moving_sum_num
 
 SELECT * FROM moving_sum_num ORDER BY k,dt FORMAT TabSeparatedWithNames;
 
+-- Result of function 'groupArrayMovingSum' depends on the order of merging
+-- aggregate states which is implementation defined in external aggregation.
+SET max_bytes_before_external_group_by = 0;
+
 SELECT k, groupArrayMovingSum(v) FROM (SELECT * FROM moving_sum_num ORDER BY k, dt) GROUP BY k ORDER BY k FORMAT TabSeparatedWithNamesAndTypes;
 SELECT k, groupArrayMovingSum(3)(v) FROM (SELECT * FROM moving_sum_num ORDER BY k, dt) GROUP BY k ORDER BY k FORMAT TabSeparatedWithNamesAndTypes;
 
diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh
index 50425eae018..c25cdf4e970 100755
--- a/tests/queries/0_stateless/01193_metadata_loading.sh
+++ b/tests/queries/0_stateless/01193_metadata_loading.sh
@@ -29,7 +29,7 @@ create_tables() {
           groupArray(
               create1 || toString(number) || create2 || engines[1 + number % length(engines)] || ';\n' ||
               insert1 ||  toString(number) || insert2
-          ), ';\n') FROM numbers($tables) FORMAT TSVRaw;" | $CLICKHOUSE_CLIENT -nm
+          ), ';\n') FROM numbers($tables) SETTINGS max_bytes_before_external_group_by = 0 FORMAT TSVRaw;" | $CLICKHOUSE_CLIENT -nm
 }
 
 $CLICKHOUSE_CLIENT -q "CREATE DATABASE $db"
diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
index 3d57518d0f4..b107af07194 100644
--- a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
+++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql
@@ -12,6 +12,7 @@ optimize table data_01513 final;
 set max_memory_usage='500M';
 set max_threads=1;
 set max_block_size=500;
+set max_bytes_before_external_group_by=0;
 
 select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241 }
 select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=1;
diff --git a/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh b/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh
index 99025890cb3..edf3683ccba 100755
--- a/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh
+++ b/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh
@@ -15,6 +15,7 @@ opts=(
     "--max_block_size=50"
     "--max_threads=1"
     "--max_distributed_connections=2"
+    "--max_bytes_before_external_group_by=0"
 )
 ${CLICKHOUSE_CLIENT} "${opts[@]}" -q "SELECT groupArray(repeat('a', if(_shard_num == 2, 100000, 1))), number%100000 k from remote('127.{2,3}', system.numbers) GROUP BY k LIMIT 10e6" |& {
     # the query should fail earlier on 127.3 and 127.2 should not even go to the memory limit exceeded error.
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
index 07e323b3c40..952a66616a9 100644
--- a/tests/queries/0_stateless/01591_window_functions.sql
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -2,6 +2,10 @@
 
 SET allow_experimental_analyzer = 1;
 
+-- Too slow
+SET max_bytes_before_external_sort = 0;
+SET max_bytes_before_external_group_by = 0;
+
 -- { echo }
 
 -- just something basic
diff --git a/tests/queries/0_stateless/02352_rwlock.sh b/tests/queries/0_stateless/02352_rwlock.sh
index 7de2c7089b8..08551794c2e 100755
--- a/tests/queries/0_stateless/02352_rwlock.sh
+++ b/tests/queries/0_stateless/02352_rwlock.sh
@@ -21,7 +21,7 @@ function wait_query_by_id_started()
     # wait for query to be started
     while [ "$($CLICKHOUSE_CLIENT "$@" -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do
         if [ "$(
-            $CLICKHOUSE_CLIENT -nm -q "
+            $CLICKHOUSE_CLIENT --max_bytes_before_external_group_by 0 -nm -q "
                 system flush logs;
 
                 select count() from system.query_log
@@ -56,7 +56,7 @@ while :; do
 
     insert_query_id="insert-$(random_str 10)"
     # 20 seconds sleep
-    $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 20000000 --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" &
+    $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 20000000 --max_bytes_before_external_group_by 0 --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" &
     if ! wait_query_by_id_started "$insert_query_id"; then
         wait
         continue

From cd65e6b60c798e6074fac914fe8c6ec09c60fa5c Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 11 Dec 2023 21:05:29 +0000
Subject: [PATCH 036/137] fix some tests

---
 tests/queries/0_stateless/01134_max_rows_to_group_by.sql       | 3 +++
 tests/queries/0_stateless/01710_aggregate_projections.sh       | 2 ++
 .../0_stateless/02428_combinators_with_over_statement.sql      | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01134_max_rows_to_group_by.sql b/tests/queries/0_stateless/01134_max_rows_to_group_by.sql
index bfbc499e1c3..f9ea37cb65a 100644
--- a/tests/queries/0_stateless/01134_max_rows_to_group_by.sql
+++ b/tests/queries/0_stateless/01134_max_rows_to_group_by.sql
@@ -2,6 +2,9 @@ SET max_block_size = 1;
 SET max_rows_to_group_by = 10;
 SET group_by_overflow_mode = 'throw';
 
+-- Settings 'max_rows_to_group_by' and 'max_bytes_before_external_group_by' are mutually exclusive.
+SET max_bytes_before_external_group_by = 0;
+
 SELECT 'test1', number FROM system.numbers GROUP BY number; -- { serverError 158 }
 
 SET group_by_overflow_mode = 'break';
diff --git a/tests/queries/0_stateless/01710_aggregate_projections.sh b/tests/queries/0_stateless/01710_aggregate_projections.sh
index 326a564a208..7ea40365937 100755
--- a/tests/queries/0_stateless/01710_aggregate_projections.sh
+++ b/tests/queries/0_stateless/01710_aggregate_projections.sh
@@ -4,6 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
+# Number of read rows depends on max_bytes_before_external_group_by.
+CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --max_bytes_before_external_group_by 0"
 
 $CLICKHOUSE_CLIENT -q "CREATE TABLE test_agg_proj (x Int32, y Int32, PROJECTION x_plus_y (SELECT sum(x - y), argMax(x, y) group by x + y)) ENGINE = MergeTree ORDER BY tuple() settings index_granularity = 1"
 $CLICKHOUSE_CLIENT -q "insert into test_agg_proj select intDiv(number, 2), -intDiv(number,3) - 1 from numbers(100)"
diff --git a/tests/queries/0_stateless/02428_combinators_with_over_statement.sql b/tests/queries/0_stateless/02428_combinators_with_over_statement.sql
index b42066cdf52..7946b997b00 100644
--- a/tests/queries/0_stateless/02428_combinators_with_over_statement.sql
+++ b/tests/queries/0_stateless/02428_combinators_with_over_statement.sql
@@ -1,6 +1,6 @@
 drop table if exists test;
 create table test (x AggregateFunction(uniq, UInt64), y Int64) engine=Memory;
-insert into test select uniqState(number) as x, number as y from numbers(10) group by number;
+insert into test select uniqState(number) as x, number as y from numbers(10) group by number order by x, y;
 select uniqStateMap(map(1, x)) OVER (PARTITION BY y) from test;
 select uniqStateForEach([x]) OVER (PARTITION BY y) from test;
 select uniqStateResample(30, 75, 30)([x], 30) OVER (PARTITION BY y) from test;

From e34c13b9d9449c1b2fd6691834e7b448c4674727 Mon Sep 17 00:00:00 2001
From: Zhuo Qiu <jewelz.q.915@gmail.com>
Date: Wed, 22 Nov 2023 17:09:54 +0800
Subject: [PATCH 037/137] Consider lightweight deleted rows when selecting
 parts to merge

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 96 +++++++++++++++++++
 src/Storages/MergeTree/IMergeTreeDataPart.h   | 14 +++
 .../MergeTree/MergeFromLogEntryTask.cpp       |  2 +-
 .../MergeTree/MergeTreeDataMergerMutator.cpp  | 11 ++-
 .../MergeTree/MergeTreeDataMergerMutator.h    |  2 +-
 src/Storages/MergeTree/MergeTreeSettings.h    |  1 +
 .../MergeTree/MutateFromLogEntryTask.cpp      |  2 +-
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    |  2 +-
 src/Storages/StorageMergeTree.cpp             |  4 +-
 .../02942_consider_lwd_when_merge.reference   |  3 +
 .../02942_consider_lwd_when_merge.sql         | 23 +++++
 11 files changed, 150 insertions(+), 10 deletions(-)
 create mode 100644 tests/queries/0_stateless/02942_consider_lwd_when_merge.reference
 create mode 100644 tests/queries/0_stateless/02942_consider_lwd_when_merge.sql

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 38ecd8f4067..d5e1cf25188 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -593,6 +593,23 @@ UInt64 IMergeTreeDataPart::getMarksCount() const
     return index_granularity.getMarksCount();
 }
 
+UInt64 IMergeTreeDataPart::getExistingBytesOnDisk() const
+{
+    if (!supportLightweightDeleteMutate() || !hasLightweightDelete() || !rows_count
+        || !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge)
+        return bytes_on_disk;
+
+    /// Uninitialized existing_rows_count
+    /// (if existing_rows_count equals rows_count, it means that previously we failed to read existing_rows_count)
+    if (existing_rows_count > rows_count)
+        readExistingRowsCount();
+
+    if (existing_rows_count < rows_count)
+        return bytes_on_disk * existing_rows_count / rows_count;
+    else /// Load failed
+        return bytes_on_disk;
+}
+
 size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
 {
     auto checksum = checksums.files.find(file_name);
@@ -1285,6 +1302,85 @@ void IMergeTreeDataPart::loadRowsCount()
     }
 }
 
+void IMergeTreeDataPart::readExistingRowsCount() const
+{
+    if (!supportLightweightDeleteMutate() || !hasLightweightDelete() || !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge
+        || existing_rows_count < rows_count || !getMarksCount())
+        return;
+
+    std::lock_guard lock(existing_rows_count_mutex);
+
+    /// Already read by another thread
+    if (existing_rows_count < rows_count)
+        return;
+
+    NamesAndTypesList cols;
+    cols.push_back(LightweightDeleteDescription::FILTER_COLUMN);
+
+    StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr();
+    StorageSnapshotPtr storage_snapshot_ptr = std::make_shared<StorageSnapshot>(storage, metadata_ptr);
+
+    MergeTreeReaderPtr reader = getReader(
+        cols,
+        storage_snapshot_ptr,
+        MarkRanges{MarkRange(0, getMarksCount())},
+        nullptr,
+        storage.getContext()->getMarkCache().get(),
+        std::make_shared<AlterConversions>(),
+        MergeTreeReaderSettings{},
+        ValueSizeMap{},
+        ReadBufferFromFileBase::ProfileCallback{});
+
+    if (!reader)
+    {
+        LOG_WARNING(storage.log, "Create reader failed while reading existing rows count");
+        existing_rows_count = rows_count;
+        return;
+    }
+
+    size_t current_mark = 0;
+    const size_t total_mark = getMarksCount();
+
+    bool continue_reading = false;
+    size_t current_row = 0;
+    size_t existing_count = 0;
+
+    while (current_row < rows_count)
+    {
+        size_t rows_to_read = index_granularity.getMarkRows(current_mark);
+        continue_reading = (current_mark != 0);
+
+        Columns result;
+        result.resize(1);
+
+        size_t rows_read = reader->readRows(current_mark, total_mark, continue_reading, rows_to_read, result);
+        if (!rows_read)
+        {
+            LOG_WARNING(storage.log, "Part {} has lightweight delete, but _row_exists column not found", name);
+            existing_rows_count = rows_count;
+            return;
+        }
+
+        current_row += rows_read;
+        current_mark += (rows_to_read == rows_read);
+
+        const ColumnUInt8 * row_exists_col = typeid_cast<const ColumnUInt8 *>(result[0].get());
+        if (!row_exists_col)
+        {
+            LOG_WARNING(storage.log, "Part {} _row_exists column type is not UInt8", name);
+            existing_rows_count = rows_count;
+            return;
+        }
+
+        for (UInt8 row_exists : row_exists_col->getData())
+            if (row_exists)
+                existing_count++;
+    }
+
+    existing_rows_count = existing_count;
+    LOG_DEBUG(storage.log, "Part {} existing_rows_count = {}", name, existing_rows_count);
+}
+
 void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files)
 {
     files.push_back("count.txt");
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 06e0712646a..349f58da7c1 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -229,6 +229,13 @@ public:
 
     size_t rows_count = 0;
 
+    /// Existing rows count (excluding lightweight deleted rows)
+    /// UINT64_MAX -> uninitialized
+    /// 0 -> all rows were deleted
+    /// if reading failed, it will be set to rows_count
+    mutable size_t existing_rows_count = UINT64_MAX;
+    mutable std::mutex existing_rows_count_mutex;
+
     time_t modification_time = 0;
     /// When the part is removed from the working set. Changes once.
     mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
@@ -372,6 +379,10 @@ public:
     UInt64 getBytesOnDisk() const { return bytes_on_disk; }
     void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; }
 
+    /// Returns estimated size of existing rows if setting exclude_deleted_rows_for_part_size_in_merge is true
+    /// Otherwise returns bytes_on_disk
+    UInt64 getExistingBytesOnDisk() const;
+
     size_t getFileSizeOrZero(const String & file_name) const;
     auto getFilesChecksums() const { return checksums.files; }
 
@@ -498,6 +509,9 @@ public:
     /// True if here is lightweight deleted mask file in part.
     bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); }
 
+    /// Read existing rows count from _row_exists column
+    void readExistingRowsCount() const;
+
     void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings);
 
     /// Checks the consistency of this data part.
diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index 3d8bc62b5cc..975cffbed9f 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -160,7 +160,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
     }
 
     /// Start to make the main work
-    size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts);
+    size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts, true);
 
     /// Can throw an exception while reserving space.
     IMergeTreeDataPart::TTLInfos ttl_infos;
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index f78b383e173..042a6cce24c 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -405,7 +405,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo
         }
 
         IMergeSelector::Part part_info;
-        part_info.size = part->getBytesOnDisk();
+        part_info.size = part->getExistingBytesOnDisk();
         part_info.age = res.current_time - part->modification_time;
         part_info.level = part->info.level;
         part_info.data = &part;
@@ -611,7 +611,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti
             return SelectPartsDecision::CANNOT_SELECT;
         }
 
-        sum_bytes += (*it)->getBytesOnDisk();
+        sum_bytes += (*it)->getExistingBytesOnDisk();
 
         prev_it = it;
         ++it;
@@ -793,7 +793,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart
 }
 
 
-size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts)
+size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge)
 {
     size_t res = 0;
     time_t current_time = std::time(nullptr);
@@ -804,7 +804,10 @@ size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::
         if (part_max_ttl && part_max_ttl <= current_time)
             continue;
 
-        res += part->getBytesOnDisk();
+        if (is_merge && part->storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge)
+            res += part->getExistingBytesOnDisk();
+        else
+            res += part->getBytesOnDisk();
     }
 
     return static_cast<size_t>(res * DISK_USAGE_COEFFICIENT_TO_RESERVE);
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index 6eab0ee0c37..4cc9ea170f3 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -193,7 +193,7 @@ public:
 
 
     /// The approximate amount of disk space needed for merge or mutation. With a surplus.
-    static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts);
+    static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge);
 
 private:
     /** Select all parts belonging to the same partition.
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 5bb712ea786..d9b996b36ca 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -73,6 +73,7 @@ struct Settings;
     M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \
     M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
     M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
+    M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "If true, estimated size (excluding lightweight deleted rows) will be used as source part size when selecting parts to merge", 0) \
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \
diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
index a9ff687fe4d..620b0e34c6a 100644
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@@ -49,7 +49,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
     }
 
     /// TODO - some better heuristic?
-    size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part});
+    size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}, false);
 
     if (entry.create_time + storage_settings_ptr->prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr)
         && estimated_space_for_result >= storage_settings_ptr->prefer_fetch_merged_part_size_threshold)
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index bb74c4dd7bb..c45abb282a0 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1349,7 +1349,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
                 if (auto part_in_memory = asInMemoryPart(part))
                     sum_parts_size_in_bytes += part_in_memory->block.bytes();
                 else
-                    sum_parts_size_in_bytes += part->getBytesOnDisk();
+                    sum_parts_size_in_bytes += part->getExistingBytesOnDisk();
             }
         }
 
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index e9a0dd5fbf3..d1192bbfbd9 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -1062,7 +1062,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
     if (isTTLMergeType(future_part->merge_type))
         getContext()->getMergeList().bookMergeWithTTL();
 
-    merging_tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts), *this, metadata_snapshot, false);
+    merging_tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts, true), *this, metadata_snapshot, false);
     return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(merging_tagger), std::make_shared<MutationCommands>());
 }
 
@@ -1279,7 +1279,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
             future_part->name = part->getNewName(new_part_info);
             future_part->part_format = part->getFormat();
 
-            tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true);
+            tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true);
             return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(tagger), commands, txn);
         }
     }
diff --git a/tests/queries/0_stateless/02942_consider_lwd_when_merge.reference b/tests/queries/0_stateless/02942_consider_lwd_when_merge.reference
new file mode 100644
index 00000000000..19920de3d3c
--- /dev/null
+++ b/tests/queries/0_stateless/02942_consider_lwd_when_merge.reference
@@ -0,0 +1,3 @@
+2
+2
+1
diff --git a/tests/queries/0_stateless/02942_consider_lwd_when_merge.sql b/tests/queries/0_stateless/02942_consider_lwd_when_merge.sql
new file mode 100644
index 00000000000..a65e8877020
--- /dev/null
+++ b/tests/queries/0_stateless/02942_consider_lwd_when_merge.sql
@@ -0,0 +1,23 @@
+DROP TABLE IF EXISTS lwd_merge;
+
+CREATE TABLE lwd_merge (id UInt64 CODEC(NONE))
+    ENGINE = MergeTree ORDER BY id
+SETTINGS max_bytes_to_merge_at_max_space_in_pool = 80000, exclude_deleted_rows_for_part_size_in_merge = 0;
+
+INSERT INTO lwd_merge SELECT number FROM numbers(10000);
+INSERT INTO lwd_merge SELECT number FROM numbers(10000, 10000);
+
+OPTIMIZE TABLE lwd_merge;
+SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
+
+DELETE FROM lwd_merge WHERE id % 10 > 0;
+
+OPTIMIZE TABLE lwd_merge;
+SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
+
+ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1;
+
+OPTIMIZE TABLE lwd_merge;
+SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
+
+DROP TABLE IF EXISTS lwd_merge;

From 0b89fbbdd92af06045f021f84552996069208b82 Mon Sep 17 00:00:00 2001
From: Chen Lixiang <chenlixiang.dev@gmail.com>
Date: Tue, 12 Dec 2023 16:49:58 +0800
Subject: [PATCH 038/137] fix style issue

---
 src/Storages/IStorage.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index c50de2ad6bc..1693d0e073b 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -701,7 +701,7 @@ public:
     ///
     /// Does not take underlying Storage (if any) into account
     virtual std::optional<UInt64> totalBytesUncompressed(const Settings &) const { return {}; }
-    
+
     /// Number of rows INSERTed since server start.
     ///
     /// Does not take the underlying Storage (if any) into account.

From f805956661c5e5513eae0c825f3bbd980661d6ee Mon Sep 17 00:00:00 2001
From: Chen Lixiang <chenlixiang.dev@gmail.com>
Date: Tue, 12 Dec 2023 20:29:51 +0800
Subject: [PATCH 039/137] fix test

---
 .../queries/0_stateless/02117_show_create_table_system.reference | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index 9ed905a0df8..0f0f91f5494 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -1086,6 +1086,7 @@ CREATE TABLE system.tables
     `storage_policy` String,
     `total_rows` Nullable(UInt64),
     `total_bytes` Nullable(UInt64),
+    `total_bytes_uncompressed` Nullable(UInt64),
     `parts` Nullable(UInt64),
     `active_parts` Nullable(UInt64),
     `total_marks` Nullable(UInt64),

From 3d51192bada7bd71d8eb880b549fe2f7549c2ee7 Mon Sep 17 00:00:00 2001
From: Natalya Chizhonkova <gudqeit@ytsaurus.tech>
Date: Tue, 12 Dec 2023 17:03:49 +0300
Subject: [PATCH 040/137] Refactoring for UserDefinedSQLObjectsLoader

---
 programs/local/LocalServer.cpp                |   4 +-
 programs/server/Server.cpp                    |   4 +-
 .../IUserDefinedSQLObjectsLoader.h            |  47 -----
 .../IUserDefinedSQLObjectsStorage.h           |  74 +++++++
 .../UserDefinedSQLFunctionFactory.cpp         | 133 +++---------
 .../UserDefinedSQLFunctionFactory.h           |  16 +-
 .../UserDefinedSQLObjectsBackup.cpp           |  12 +-
 ...p => UserDefinedSQLObjectsDiskStorage.cpp} |  42 ++--
 ...k.h => UserDefinedSQLObjectsDiskStorage.h} |  19 +-
 .../UserDefinedSQLObjectsStorageBase.cpp      | 190 ++++++++++++++++++
 .../UserDefinedSQLObjectsStorageBase.h        |  69 +++++++
 ...UserDefinedSQLObjectsZooKeeperStorage.cpp} |  72 +++----
 ...> UserDefinedSQLObjectsZooKeeperStorage.h} |  21 +-
 .../createUserDefinedSQLObjectsLoader.h       |  12 --
 ...=> createUserDefinedSQLObjectsStorage.cpp} |  12 +-
 .../createUserDefinedSQLObjectsStorage.h      |  12 ++
 src/Interpreters/Context.cpp                  |  40 ++--
 src/Interpreters/Context.h                    |   7 +-
 .../InterpreterCreateFunctionQuery.cpp        |   4 +-
 .../InterpreterDropFunctionQuery.cpp          |   4 +-
 .../removeOnClusterClauseIfNeeded.cpp         |   4 +-
 .../test.py                                   |   4 +-
 22 files changed, 508 insertions(+), 294 deletions(-)
 delete mode 100644 src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h
 create mode 100644 src/Functions/UserDefined/IUserDefinedSQLObjectsStorage.h
 rename src/Functions/UserDefined/{UserDefinedSQLObjectsLoaderFromDisk.cpp => UserDefinedSQLObjectsDiskStorage.cpp} (80%)
 rename src/Functions/UserDefined/{UserDefinedSQLObjectsLoaderFromDisk.h => UserDefinedSQLObjectsDiskStorage.h} (65%)
 create mode 100644 src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp
 create mode 100644 src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h
 rename src/Functions/UserDefined/{UserDefinedSQLObjectsLoaderFromZooKeeper.cpp => UserDefinedSQLObjectsZooKeeperStorage.cpp} (82%)
 rename src/Functions/UserDefined/{UserDefinedSQLObjectsLoaderFromZooKeeper.h => UserDefinedSQLObjectsZooKeeperStorage.h} (80%)
 delete mode 100644 src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h
 rename src/Functions/UserDefined/{createUserDefinedSQLObjectsLoader.cpp => createUserDefinedSQLObjectsStorage.cpp} (61%)
 create mode 100644 src/Functions/UserDefined/createUserDefinedSQLObjectsStorage.h

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index fbb64ea1135..7357c239e6b 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -43,7 +43,7 @@
 #include <Parsers/IAST.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Common/ErrorHandlers.h>
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
 #include <Functions/registerFunctions.h>
 #include <AggregateFunctions/registerAggregateFunctions.h>
 #include <TableFunctions/registerTableFunctions.h>
@@ -757,7 +757,7 @@ void LocalServer::processConfig()
         }
 
         /// For ClickHouse local if path is not set the loader will be disabled.
-        global_context->getUserDefinedSQLObjectsLoader().loadObjects();
+        global_context->getUserDefinedSQLObjectsStorage().loadObjects();
 
         LOG_DEBUG(log, "Loaded metadata.");
     }
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 36f0ce90e57..b88bbb37866 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -66,7 +66,7 @@
 #include <Storages/Cache/registerRemoteFileMetadatas.h>
 #include <Common/NamedCollections/NamedCollectionUtils.h>
 #include <AggregateFunctions/registerAggregateFunctions.h>
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
 #include <Functions/registerFunctions.h>
 #include <TableFunctions/registerTableFunctions.h>
 #include <Formats/registerFormats.h>
@@ -1716,7 +1716,7 @@ try
         /// After loading validate that default database exists
         database_catalog.assertDatabaseExists(default_database);
         /// Load user-defined SQL functions.
-        global_context->getUserDefinedSQLObjectsLoader().loadObjects();
+        global_context->getUserDefinedSQLObjectsStorage().loadObjects();
     }
     catch (...)
     {
diff --git a/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h b/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h
deleted file mode 100644
index 4c7850951b5..00000000000
--- a/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#pragma once
-
-#include <base/types.h>
-
-
-namespace DB
-{
-class IAST;
-struct Settings;
-enum class UserDefinedSQLObjectType;
-
-/// Interface for a loader of user-defined SQL objects.
-/// Implementations: UserDefinedSQLLoaderFromDisk, UserDefinedSQLLoaderFromZooKeeper
-class IUserDefinedSQLObjectsLoader
-{
-public:
-    virtual ~IUserDefinedSQLObjectsLoader() = default;
-
-    /// Whether this loader can replicate SQL objects to another node.
-    virtual bool isReplicated() const { return false; }
-    virtual String getReplicationID() const { return ""; }
-
-    /// Loads all objects. Can be called once - if objects are already loaded the function does nothing.
-    virtual void loadObjects() = 0;
-
-    /// Stops watching.
-    virtual void stopWatching() {}
-
-    /// Immediately reloads all objects, throws an exception if failed.
-    virtual void reloadObjects() = 0;
-
-    /// Immediately reloads a specified object only.
-    virtual void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) = 0;
-
-    /// Stores an object (must be called only by UserDefinedSQLFunctionFactory::registerFunction).
-    virtual bool storeObject(
-        UserDefinedSQLObjectType object_type,
-        const String & object_name,
-        const IAST & create_object_query,
-        bool throw_if_exists,
-        bool replace_if_exists,
-        const Settings & settings) = 0;
-
-    /// Removes an object (must be called only by UserDefinedSQLFunctionFactory::unregisterFunction).
-    virtual bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) = 0;
-};
-}
diff --git a/src/Functions/UserDefined/IUserDefinedSQLObjectsStorage.h b/src/Functions/UserDefined/IUserDefinedSQLObjectsStorage.h
new file mode 100644
index 00000000000..345ff8c5954
--- /dev/null
+++ b/src/Functions/UserDefined/IUserDefinedSQLObjectsStorage.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <base/types.h>
+
+#include <Interpreters/Context_fwd.h>
+
+#include <Parsers/IAST_fwd.h>
+
+
+namespace DB
+{
+class IAST;
+struct Settings;
+enum class UserDefinedSQLObjectType;
+
+/// Interface for a storage of user-defined SQL objects.
+/// Implementations: UserDefinedSQLObjectsDiskStorage, UserDefinedSQLObjectsZooKeeperStorage
+class IUserDefinedSQLObjectsStorage
+{
+public:
+    virtual ~IUserDefinedSQLObjectsStorage() = default;
+
+    /// Whether this loader can replicate SQL objects to another node.
+    virtual bool isReplicated() const { return false; }
+    virtual String getReplicationID() const { return ""; }
+
+    /// Loads all objects. Can be called once - if objects are already loaded the function does nothing.
+    virtual void loadObjects() = 0;
+
+    /// Get object by name. If no object stored with object_name throws exception.
+    virtual ASTPtr get(const String & object_name) const = 0;
+
+    /// Get object by name. If no object stored with object_name return nullptr.
+    virtual ASTPtr tryGet(const String & object_name) const = 0;
+
+    /// Check if object with object_name is stored.
+    virtual bool has(const String & object_name) const = 0;
+
+    /// Get all user defined object names.
+    virtual std::vector<String> getAllObjectNames() const = 0;
+
+    /// Get all user defined objects.
+    virtual std::vector<std::pair<String, ASTPtr>> getAllObjects() const = 0;
+
+    /// Check whether any UDFs have been stored.
+    virtual bool empty() const = 0;
+
+    /// Stops watching.
+    virtual void stopWatching() {}
+
+    /// Immediately reloads all objects, throws an exception if failed.
+    virtual void reloadObjects() = 0;
+
+    /// Immediately reloads a specified object only.
+    virtual void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) = 0;
+
+    /// Stores an object (must be called only by UserDefinedSQLFunctionFactory::registerFunction).
+    virtual bool storeObject(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        ASTPtr create_object_query,
+        bool throw_if_exists,
+        bool replace_if_exists,
+        const Settings & settings) = 0;
+
+    /// Removes an object (must be called only by UserDefinedSQLFunctionFactory::unregisterFunction).
+    virtual bool removeObject(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        bool throw_if_not_exists) = 0;
+};
+}
diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp
index c4a503589eb..e37e4a23b63 100644
--- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp
@@ -3,7 +3,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <Backups/RestorerFromBackup.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
 #include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
 #include <Functions/UserDefined/UserDefinedSQLObjectType.h>
 #include <Functions/UserDefined/UserDefinedSQLObjectsBackup.h>
@@ -14,8 +14,6 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Common/quoteString.h>
 
-#include <boost/container/flat_set.hpp>
-
 
 namespace DB
 {
@@ -23,7 +21,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int FUNCTION_ALREADY_EXISTS;
-    extern const int UNKNOWN_FUNCTION;
     extern const int CANNOT_DROP_FUNCTION;
     extern const int CANNOT_CREATE_RECURSIVE_FUNCTION;
     extern const int UNSUPPORTED_METHOD;
@@ -130,20 +127,17 @@ bool UserDefinedSQLFunctionFactory::registerFunction(const ContextMutablePtr & c
     checkCanBeRegistered(context, function_name, *create_function_query);
     create_function_query = normalizeCreateFunctionQuery(*create_function_query);
 
-    std::lock_guard lock{mutex};
-    auto it = function_name_to_create_query_map.find(function_name);
-    if (it != function_name_to_create_query_map.end())
-    {
-        if (throw_if_exists)
-            throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User-defined function '{}' already exists", function_name);
-        else if (!replace_if_exists)
-            return false;
-    }
-
     try
     {
-        auto & loader = context->getUserDefinedSQLObjectsLoader();
-        bool stored = loader.storeObject(UserDefinedSQLObjectType::Function, function_name, *create_function_query, throw_if_exists, replace_if_exists, context->getSettingsRef());
+        auto & loader = context->getUserDefinedSQLObjectsStorage();
+        bool stored = loader.storeObject(
+            context,
+            UserDefinedSQLObjectType::Function,
+            function_name,
+            create_function_query,
+            throw_if_exists,
+            replace_if_exists,
+            context->getSettingsRef());
         if (!stored)
             return false;
     }
@@ -153,7 +147,6 @@ bool UserDefinedSQLFunctionFactory::registerFunction(const ContextMutablePtr & c
         throw;
     }
 
-    function_name_to_create_query_map[function_name] = create_function_query;
     return true;
 }
 
@@ -161,20 +154,14 @@ bool UserDefinedSQLFunctionFactory::unregisterFunction(const ContextMutablePtr &
 {
     checkCanBeUnregistered(context, function_name);
 
-    std::lock_guard lock(mutex);
-    auto it = function_name_to_create_query_map.find(function_name);
-    if (it == function_name_to_create_query_map.end())
-    {
-        if (throw_if_not_exists)
-            throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "User-defined function '{}' doesn't exist", function_name);
-        else
-            return false;
-    }
-
     try
     {
-        auto & loader = context->getUserDefinedSQLObjectsLoader();
-        bool removed = loader.removeObject(UserDefinedSQLObjectType::Function, function_name, throw_if_not_exists);
+        auto & storage = context->getUserDefinedSQLObjectsStorage();
+        bool removed = storage.removeObject(
+            context,
+            UserDefinedSQLObjectType::Function,
+            function_name,
+            throw_if_not_exists);
         if (!removed)
             return false;
     }
@@ -184,61 +171,41 @@ bool UserDefinedSQLFunctionFactory::unregisterFunction(const ContextMutablePtr &
         throw;
     }
 
-    function_name_to_create_query_map.erase(function_name);
     return true;
 }
 
 ASTPtr UserDefinedSQLFunctionFactory::get(const String & function_name) const
 {
-    std::lock_guard lock(mutex);
-
-    auto it = function_name_to_create_query_map.find(function_name);
-    if (it == function_name_to_create_query_map.end())
-        throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
-            "The function name '{}' is not registered",
-            function_name);
-
-    return it->second;
+    return global_context->getUserDefinedSQLObjectsStorage().get(function_name);
 }
 
 ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) const
 {
-    std::lock_guard lock(mutex);
-
-    auto it = function_name_to_create_query_map.find(function_name);
-    if (it == function_name_to_create_query_map.end())
-        return nullptr;
-
-    return it->second;
+    return global_context->getUserDefinedSQLObjectsStorage().tryGet(function_name);
 }
 
 bool UserDefinedSQLFunctionFactory::has(const String & function_name) const
 {
-    return tryGet(function_name) != nullptr;
+    return global_context->getUserDefinedSQLObjectsStorage().has(function_name);
 }
 
 std::vector<std::string> UserDefinedSQLFunctionFactory::getAllRegisteredNames() const
 {
-    std::vector<std::string> registered_names;
-
-    std::lock_guard lock(mutex);
-    registered_names.reserve(function_name_to_create_query_map.size());
-
-    for (const auto & [name, _] : function_name_to_create_query_map)
-        registered_names.emplace_back(name);
-
-    return registered_names;
+    return global_context->getUserDefinedSQLObjectsStorage().getAllObjectNames();
 }
 
 bool UserDefinedSQLFunctionFactory::empty() const
 {
-    std::lock_guard lock(mutex);
-    return function_name_to_create_query_map.empty();
+    return global_context->getUserDefinedSQLObjectsStorage().empty();
 }
 
 void UserDefinedSQLFunctionFactory::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup) const
 {
-    backupUserDefinedSQLObjects(backup_entries_collector, data_path_in_backup, UserDefinedSQLObjectType::Function, getAllFunctions());
+    backupUserDefinedSQLObjects(
+        backup_entries_collector,
+        data_path_in_backup,
+        UserDefinedSQLObjectType::Function,
+        global_context->getUserDefinedSQLObjectsStorage().getAllObjects());
 }
 
 void UserDefinedSQLFunctionFactory::restore(RestorerFromBackup & restorer, const String & data_path_in_backup)
@@ -252,52 +219,4 @@ void UserDefinedSQLFunctionFactory::restore(RestorerFromBackup & restorer, const
         registerFunction(context, function_name, create_function_query, throw_if_exists, replace_if_exists);
 }
 
-void UserDefinedSQLFunctionFactory::setAllFunctions(const std::vector<std::pair<String, ASTPtr>> & new_functions)
-{
-    std::unordered_map<String, ASTPtr> normalized_functions;
-    for (const auto & [function_name, create_query] : new_functions)
-        normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query);
-
-    std::lock_guard lock(mutex);
-    function_name_to_create_query_map = std::move(normalized_functions);
-}
-
-std::vector<std::pair<String, ASTPtr>> UserDefinedSQLFunctionFactory::getAllFunctions() const
-{
-    std::lock_guard lock{mutex};
-    std::vector<std::pair<String, ASTPtr>> all_functions;
-    all_functions.reserve(function_name_to_create_query_map.size());
-    std::copy(function_name_to_create_query_map.begin(), function_name_to_create_query_map.end(), std::back_inserter(all_functions));
-    return all_functions;
-}
-
-void UserDefinedSQLFunctionFactory::setFunction(const String & function_name, const IAST & create_function_query)
-{
-    std::lock_guard lock(mutex);
-    function_name_to_create_query_map[function_name] = normalizeCreateFunctionQuery(create_function_query);
-}
-
-void UserDefinedSQLFunctionFactory::removeFunction(const String & function_name)
-{
-    std::lock_guard lock(mutex);
-    function_name_to_create_query_map.erase(function_name);
-}
-
-void UserDefinedSQLFunctionFactory::removeAllFunctionsExcept(const Strings & function_names_to_keep)
-{
-    boost::container::flat_set<std::string_view> names_set_to_keep{function_names_to_keep.begin(), function_names_to_keep.end()};
-    std::lock_guard lock(mutex);
-    for (auto it = function_name_to_create_query_map.begin(); it != function_name_to_create_query_map.end();)
-    {
-        auto current = it++;
-        if (!names_set_to_keep.contains(current->first))
-            function_name_to_create_query_map.erase(current);
-    }
-}
-
-std::unique_lock<std::recursive_mutex> UserDefinedSQLFunctionFactory::getLock() const
-{
-    return std::unique_lock{mutex};
-}
-
 }
diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h
index a7d586061b2..b1f3940323a 100644
--- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h
+++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h
@@ -6,7 +6,7 @@
 #include <Common/NamePrompter.h>
 
 #include <Parsers/ASTCreateFunctionQuery.h>
-#include <Interpreters/Context_fwd.h>
+#include <Interpreters/Context.h>
 
 
 namespace DB
@@ -48,23 +48,11 @@ public:
     void restore(RestorerFromBackup & restorer, const String & data_path_in_backup);
 
 private:
-    friend class UserDefinedSQLObjectsLoaderFromDisk;
-    friend class UserDefinedSQLObjectsLoaderFromZooKeeper;
-
     /// Checks that a specified function can be registered, throws an exception if not.
     static void checkCanBeRegistered(const ContextPtr & context, const String & function_name, const IAST & create_function_query);
     static void checkCanBeUnregistered(const ContextPtr & context, const String & function_name);
 
-    /// The following functions must be called only by the loader.
-    void setAllFunctions(const std::vector<std::pair<String, ASTPtr>> & new_functions);
-    std::vector<std::pair<String, ASTPtr>> getAllFunctions() const;
-    void setFunction(const String & function_name, const IAST & create_function_query);
-    void removeFunction(const String & function_name);
-    void removeAllFunctionsExcept(const Strings & function_names_to_keep);
-    std::unique_lock<std::recursive_mutex> getLock() const;
-
-    std::unordered_map<String, ASTPtr> function_name_to_create_query_map;
-    mutable std::recursive_mutex mutex;
+    ContextPtr global_context = Context::getGlobalContextInstance();
 };
 
 }
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
index 6920e8ce2c2..3ec5393fa6f 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
@@ -6,7 +6,7 @@
 #include <Backups/IBackupCoordination.h>
 #include <Backups/IRestoreCoordination.h>
 #include <Backups/RestorerFromBackup.h>
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
 #include <Functions/UserDefined/UserDefinedSQLObjectType.h>
 #include <Interpreters/Context.h>
 #include <Parsers/ParserCreateFunctionQuery.h>
@@ -37,9 +37,9 @@ void backupUserDefinedSQLObjects(
             escapeForFileName(object_name) + ".sql", std::make_shared<BackupEntryFromMemory>(queryToString(create_object_query)));
 
     auto context = backup_entries_collector.getContext();
-    const auto & loader = context->getUserDefinedSQLObjectsLoader();
+    const auto & storage = context->getUserDefinedSQLObjectsStorage();
 
-    if (!loader.isReplicated())
+    if (!storage.isReplicated())
     {
         fs::path data_path_in_backup_fs{data_path_in_backup};
         for (const auto & [file_name, entry] : backup_entries)
@@ -47,7 +47,7 @@ void backupUserDefinedSQLObjects(
         return;
     }
 
-    String replication_id = loader.getReplicationID();
+    String replication_id = storage.getReplicationID();
 
     auto backup_coordination = backup_entries_collector.getBackupCoordination();
     backup_coordination->addReplicatedSQLObjectsDir(replication_id, object_type, data_path_in_backup);
@@ -80,9 +80,9 @@ std::vector<std::pair<String, ASTPtr>>
 restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_path_in_backup, UserDefinedSQLObjectType object_type)
 {
     auto context = restorer.getContext();
-    const auto & loader = context->getUserDefinedSQLObjectsLoader();
+    const auto & storage = context->getUserDefinedSQLObjectsStorage();
 
-    if (loader.isReplicated() && !restorer.getRestoreCoordination()->acquireReplicatedSQLObjects(loader.getReplicationID(), object_type))
+    if (storage.isReplicated() && !restorer.getRestoreCoordination()->acquireReplicatedSQLObjects(storage.getReplicationID(), object_type))
         return {}; /// Other replica is already restoring user-defined SQL objects.
 
     auto backup = restorer.getBackup();
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp
similarity index 80%
rename from src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp
rename to src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp
index d67c48f166d..271c464e79a 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp
@@ -1,4 +1,4 @@
-#include "Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h"
+#include "Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h"
 
 #include "Functions/UserDefined/UserDefinedSQLFunctionFactory.h"
 #include "Functions/UserDefined/UserDefinedSQLObjectType.h"
@@ -51,7 +51,7 @@ namespace
     }
 }
 
-UserDefinedSQLObjectsLoaderFromDisk::UserDefinedSQLObjectsLoaderFromDisk(const ContextPtr & global_context_, const String & dir_path_)
+UserDefinedSQLObjectsDiskStorage::UserDefinedSQLObjectsDiskStorage(const ContextPtr & global_context_, const String & dir_path_)
     : global_context(global_context_)
     , dir_path{makeDirectoryPathCanonical(dir_path_)}
     , log{&Poco::Logger::get("UserDefinedSQLObjectsLoaderFromDisk")}
@@ -60,13 +60,13 @@ UserDefinedSQLObjectsLoaderFromDisk::UserDefinedSQLObjectsLoaderFromDisk(const C
 }
 
 
-ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name)
+ASTPtr UserDefinedSQLObjectsDiskStorage::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name)
 {
     return tryLoadObject(object_type, object_name, getFilePath(object_type, object_name), /* check_file_exists= */ true);
 }
 
 
-ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & path, bool check_file_exists)
+ASTPtr UserDefinedSQLObjectsDiskStorage::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & path, bool check_file_exists)
 {
     LOG_DEBUG(log, "Loading user defined object {} from file {}", backQuote(object_name), path);
 
@@ -93,7 +93,6 @@ ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectTy
                     "",
                     0,
                     global_context->getSettingsRef().max_parser_depth);
-                UserDefinedSQLFunctionFactory::checkCanBeRegistered(global_context, object_name, *ast);
                 return ast;
             }
         }
@@ -106,20 +105,20 @@ ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectTy
 }
 
 
-void UserDefinedSQLObjectsLoaderFromDisk::loadObjects()
+void UserDefinedSQLObjectsDiskStorage::loadObjects()
 {
     if (!objects_loaded)
         loadObjectsImpl();
 }
 
 
-void UserDefinedSQLObjectsLoaderFromDisk::reloadObjects()
+void UserDefinedSQLObjectsDiskStorage::reloadObjects()
 {
     loadObjectsImpl();
 }
 
 
-void UserDefinedSQLObjectsLoaderFromDisk::loadObjectsImpl()
+void UserDefinedSQLObjectsDiskStorage::loadObjectsImpl()
 {
     LOG_INFO(log, "Loading user defined objects from {}", dir_path);
     createDirectory();
@@ -148,26 +147,25 @@ void UserDefinedSQLObjectsLoaderFromDisk::loadObjectsImpl()
             function_names_and_queries.emplace_back(function_name, ast);
     }
 
-    UserDefinedSQLFunctionFactory::instance().setAllFunctions(function_names_and_queries);
+    setAllObjects(function_names_and_queries);
     objects_loaded = true;
 
     LOG_DEBUG(log, "User defined objects loaded");
 }
 
 
-void UserDefinedSQLObjectsLoaderFromDisk::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name)
+void UserDefinedSQLObjectsDiskStorage::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name)
 {
     createDirectory();
     auto ast = tryLoadObject(object_type, object_name);
-    auto & factory = UserDefinedSQLFunctionFactory::instance();
     if (ast)
-        factory.setFunction(object_name, *ast);
+        setObject(object_name, *ast);
     else
-        factory.removeFunction(object_name);
+        removeObject(object_name);
 }
 
 
-void UserDefinedSQLObjectsLoaderFromDisk::createDirectory()
+void UserDefinedSQLObjectsDiskStorage::createDirectory()
 {
     std::error_code create_dir_error_code;
     fs::create_directories(dir_path, create_dir_error_code);
@@ -177,10 +175,11 @@ void UserDefinedSQLObjectsLoaderFromDisk::createDirectory()
 }
 
 
-bool UserDefinedSQLObjectsLoaderFromDisk::storeObject(
+bool UserDefinedSQLObjectsDiskStorage::storeObjectImpl(
+    const ContextPtr & /*current_context*/,
     UserDefinedSQLObjectType object_type,
     const String & object_name,
-    const IAST & create_object_query,
+    ASTPtr create_object_query,
     bool throw_if_exists,
     bool replace_if_exists,
     const Settings & settings)
@@ -197,7 +196,7 @@ bool UserDefinedSQLObjectsLoaderFromDisk::storeObject(
     }
 
     WriteBufferFromOwnString create_statement_buf;
-    formatAST(create_object_query, create_statement_buf, false);
+    formatAST(*create_object_query, create_statement_buf, false);
     writeChar('\n', create_statement_buf);
     String create_statement = create_statement_buf.str();
 
@@ -228,8 +227,11 @@ bool UserDefinedSQLObjectsLoaderFromDisk::storeObject(
 }
 
 
-bool UserDefinedSQLObjectsLoaderFromDisk::removeObject(
-    UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists)
+bool UserDefinedSQLObjectsDiskStorage::removeObjectImpl(
+    const ContextPtr & /*current_context*/,
+    UserDefinedSQLObjectType object_type,
+    const String & object_name,
+    bool throw_if_not_exists)
 {
     String file_path = getFilePath(object_type, object_name);
     LOG_DEBUG(log, "Removing user defined object {} stored in file {}", backQuote(object_name), file_path);
@@ -249,7 +251,7 @@ bool UserDefinedSQLObjectsLoaderFromDisk::removeObject(
 }
 
 
-String UserDefinedSQLObjectsLoaderFromDisk::getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const
+String UserDefinedSQLObjectsDiskStorage::getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const
 {
     String file_path;
     switch (object_type)
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h
similarity index 65%
rename from src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h
rename to src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h
index 7b0bb291f42..f0986dbda72 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h>
 #include <Interpreters/Context_fwd.h>
 #include <Parsers/IAST_fwd.h>
 
@@ -9,10 +9,10 @@ namespace DB
 {
 
 /// Loads user-defined sql objects from a specified folder.
-class UserDefinedSQLObjectsLoaderFromDisk : public IUserDefinedSQLObjectsLoader
+class UserDefinedSQLObjectsDiskStorage : public UserDefinedSQLObjectsStorageBase
 {
 public:
-    UserDefinedSQLObjectsLoaderFromDisk(const ContextPtr & global_context_, const String & dir_path_);
+    UserDefinedSQLObjectsDiskStorage(const ContextPtr & global_context_, const String & dir_path_);
 
     void loadObjects() override;
 
@@ -20,17 +20,22 @@ public:
 
     void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) override;
 
-    bool storeObject(
+private:
+    bool storeObjectImpl(
+        const ContextPtr & current_context,
         UserDefinedSQLObjectType object_type,
         const String & object_name,
-        const IAST & create_object_query,
+        ASTPtr create_object_query,
         bool throw_if_exists,
         bool replace_if_exists,
         const Settings & settings) override;
 
-    bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) override;
+    bool removeObjectImpl(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        bool throw_if_not_exists) override;
 
-private:
     void createDirectory();
     void loadObjectsImpl();
     ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name);
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp
new file mode 100644
index 00000000000..4f47a46b10d
--- /dev/null
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp
@@ -0,0 +1,190 @@
+#include "Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h"
+
+#include <boost/container/flat_set.hpp>
+
+#include <Interpreters/FunctionNameNormalizer.h>
+#include <Parsers/ASTCreateFunctionQuery.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int FUNCTION_ALREADY_EXISTS;
+    extern const int UNKNOWN_FUNCTION;
+}
+
+namespace
+{
+
+ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query)
+{
+    auto ptr = create_function_query.clone();
+    auto & res = typeid_cast<ASTCreateFunctionQuery &>(*ptr);
+    res.if_not_exists = false;
+    res.or_replace = false;
+    FunctionNameNormalizer().visit(res.function_core.get());
+    return ptr;
+}
+
+}
+
+ASTPtr UserDefinedSQLObjectsStorageBase::get(const String & object_name) const
+{
+    std::lock_guard lock(mutex);
+
+    auto it = object_name_to_create_object_map.find(object_name);
+    if (it == object_name_to_create_object_map.end())
+        throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
+            "The object name '{}' is not saved",
+            object_name);
+
+    return it->second;
+}
+
+ASTPtr UserDefinedSQLObjectsStorageBase::tryGet(const std::string & object_name) const
+{
+    std::lock_guard lock(mutex);
+
+    auto it = object_name_to_create_object_map.find(object_name);
+    if (it == object_name_to_create_object_map.end())
+        return nullptr;
+
+    return it->second;
+}
+
+bool UserDefinedSQLObjectsStorageBase::has(const String & object_name) const
+{
+    return tryGet(object_name) != nullptr;
+}
+
+std::vector<std::string> UserDefinedSQLObjectsStorageBase::getAllObjectNames() const
+{
+    std::vector<std::string> object_names;
+
+    std::lock_guard lock(mutex);
+    object_names.reserve(object_name_to_create_object_map.size());
+
+    for (const auto & [name, _] : object_name_to_create_object_map)
+        object_names.emplace_back(name);
+
+    return object_names;
+}
+
+bool UserDefinedSQLObjectsStorageBase::empty() const
+{
+    std::lock_guard lock(mutex);
+    return object_name_to_create_object_map.empty();
+}
+
+bool UserDefinedSQLObjectsStorageBase::storeObject(
+    const ContextPtr & current_context,
+    UserDefinedSQLObjectType object_type,
+    const String & object_name,
+    ASTPtr create_object_query,
+    bool throw_if_exists,
+    bool replace_if_exists,
+    const Settings & settings)
+{
+    std::lock_guard lock{mutex};
+    auto it = object_name_to_create_object_map.find(object_name);
+    if (it != object_name_to_create_object_map.end())
+    {
+        if (throw_if_exists)
+            throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User-defined object '{}' already exists", object_name);
+        else if (!replace_if_exists)
+            return false;
+    }
+
+    bool stored = storeObjectImpl(
+        current_context,
+        object_type,
+        object_name,
+        create_object_query,
+        throw_if_exists,
+        replace_if_exists,
+        settings);
+
+    if (stored)
+        object_name_to_create_object_map[object_name] = create_object_query;
+
+    return stored;
+}
+
+bool UserDefinedSQLObjectsStorageBase::removeObject(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        bool throw_if_not_exists)
+{
+    std::lock_guard lock(mutex);
+    auto it = object_name_to_create_object_map.find(object_name);
+    if (it == object_name_to_create_object_map.end())
+    {
+        if (throw_if_not_exists)
+            throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "User-defined object '{}' doesn't exist", object_name);
+        else
+            return false;
+    }
+
+    bool removed = removeObjectImpl(
+        current_context,
+        object_type,
+        object_name,
+        throw_if_not_exists);
+
+    if (removed)
+        object_name_to_create_object_map.erase(object_name);
+
+    return removed;
+}
+
+std::unique_lock<std::recursive_mutex> UserDefinedSQLObjectsStorageBase::getLock() const
+{
+    return std::unique_lock{mutex};
+}
+
+void UserDefinedSQLObjectsStorageBase::setAllObjects(const std::vector<std::pair<String, ASTPtr>> & new_objects)
+{
+    std::unordered_map<String, ASTPtr> normalized_functions;
+    for (const auto & [function_name, create_query] : new_objects)
+        normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query);
+
+    std::lock_guard lock(mutex);
+    object_name_to_create_object_map = std::move(normalized_functions);
+}
+
+std::vector<std::pair<String, ASTPtr>> UserDefinedSQLObjectsStorageBase::getAllObjects() const
+{
+    std::lock_guard lock{mutex};
+    std::vector<std::pair<String, ASTPtr>> all_objects;
+    all_objects.reserve(object_name_to_create_object_map.size());
+    std::copy(object_name_to_create_object_map.begin(), object_name_to_create_object_map.end(), std::back_inserter(all_objects));
+    return all_objects;
+}
+
+void UserDefinedSQLObjectsStorageBase::setObject(const String & object_name, const IAST & create_object_query)
+{
+    std::lock_guard lock(mutex);
+    object_name_to_create_object_map[object_name] = normalizeCreateFunctionQuery(create_object_query);
+}
+
+void UserDefinedSQLObjectsStorageBase::removeObject(const String & object_name)
+{
+    std::lock_guard lock(mutex);
+    object_name_to_create_object_map.erase(object_name);
+}
+
+void UserDefinedSQLObjectsStorageBase::removeAllObjectsExcept(const Strings & object_names_to_keep)
+{
+    boost::container::flat_set<std::string_view> names_set_to_keep{object_names_to_keep.begin(), object_names_to_keep.end()};
+    std::lock_guard lock(mutex);
+    for (auto it = object_name_to_create_object_map.begin(); it != object_name_to_create_object_map.end();)
+    {
+        auto current = it++;
+        if (!names_set_to_keep.contains(current->first))
+            object_name_to_create_object_map.erase(current);
+    }
+}
+
+}
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h
new file mode 100644
index 00000000000..cab63a3bfcf
--- /dev/null
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <unordered_map>
+#include <mutex>
+
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
+
+#include <Parsers/IAST.h>
+
+namespace DB
+{
+
+class UserDefinedSQLObjectsStorageBase : public IUserDefinedSQLObjectsStorage
+{
+public:
+    ASTPtr get(const String & object_name) const override;
+
+    ASTPtr tryGet(const String & object_name) const override;
+
+    bool has(const String & object_name) const override;
+
+    std::vector<String> getAllObjectNames() const override;
+
+    std::vector<std::pair<String, ASTPtr>> getAllObjects() const override;
+
+    bool empty() const override;
+
+    bool storeObject(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        ASTPtr create_object_query,
+        bool throw_if_exists,
+        bool replace_if_exists,
+        const Settings & settings) override;
+
+    bool removeObject(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        bool throw_if_not_exists) override;
+
+protected:
+    virtual bool storeObjectImpl(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        ASTPtr create_object_query,
+        bool throw_if_exists,
+        bool replace_if_exists,
+        const Settings & settings) = 0;
+
+    virtual bool removeObjectImpl(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        bool throw_if_not_exists) = 0;
+
+    std::unique_lock<std::recursive_mutex> getLock() const;
+    void setAllObjects(const std::vector<std::pair<String, ASTPtr>> & new_objects);
+    void setObject(const String & object_name, const IAST & create_object_query);
+    void removeObject(const String & object_name);
+    void removeAllObjectsExcept(const Strings & object_names_to_keep);
+
+    std::unordered_map<String, ASTPtr> object_name_to_create_object_map;
+    mutable std::recursive_mutex mutex;
+};
+
+}
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
similarity index 82%
rename from src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp
rename to src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
index 29aff666da5..6e5a5338437 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
@@ -1,4 +1,4 @@
-#include <Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.h>
+#include <Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h>
 
 #include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
 #include <Functions/UserDefined/UserDefinedSQLObjectType.h>
@@ -47,7 +47,7 @@ namespace
 }
 
 
-UserDefinedSQLObjectsLoaderFromZooKeeper::UserDefinedSQLObjectsLoaderFromZooKeeper(
+UserDefinedSQLObjectsZooKeeperStorage::UserDefinedSQLObjectsZooKeeperStorage(
     const ContextPtr & global_context_, const String & zookeeper_path_)
     : global_context{global_context_}
     , zookeeper_getter{[global_context_]() { return global_context_->getZooKeeper(); }}
@@ -66,20 +66,20 @@ UserDefinedSQLObjectsLoaderFromZooKeeper::UserDefinedSQLObjectsLoaderFromZooKeep
         zookeeper_path = "/" + zookeeper_path;
 }
 
-UserDefinedSQLObjectsLoaderFromZooKeeper::~UserDefinedSQLObjectsLoaderFromZooKeeper()
+UserDefinedSQLObjectsZooKeeperStorage::~UserDefinedSQLObjectsZooKeeperStorage()
 {
     SCOPE_EXIT_SAFE(stopWatchingThread());
 }
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::startWatchingThread()
+void UserDefinedSQLObjectsZooKeeperStorage::startWatchingThread()
 {
     if (!watching_flag.exchange(true))
     {
-        watching_thread = ThreadFromGlobalPool(&UserDefinedSQLObjectsLoaderFromZooKeeper::processWatchQueue, this);
+        watching_thread = ThreadFromGlobalPool(&UserDefinedSQLObjectsZooKeeperStorage::processWatchQueue, this);
     }
 }
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::stopWatchingThread()
+void UserDefinedSQLObjectsZooKeeperStorage::stopWatchingThread()
 {
     if (watching_flag.exchange(false))
     {
@@ -89,7 +89,7 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::stopWatchingThread()
     }
 }
 
-zkutil::ZooKeeperPtr UserDefinedSQLObjectsLoaderFromZooKeeper::getZooKeeper()
+zkutil::ZooKeeperPtr UserDefinedSQLObjectsZooKeeperStorage::getZooKeeper()
 {
     auto [zookeeper, session_status] = zookeeper_getter.getZooKeeper();
 
@@ -106,18 +106,18 @@ zkutil::ZooKeeperPtr UserDefinedSQLObjectsLoaderFromZooKeeper::getZooKeeper()
     return zookeeper;
 }
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::initZooKeeperIfNeeded()
+void UserDefinedSQLObjectsZooKeeperStorage::initZooKeeperIfNeeded()
 {
     getZooKeeper();
 }
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::resetAfterError()
+void UserDefinedSQLObjectsZooKeeperStorage::resetAfterError()
 {
     zookeeper_getter.resetCache();
 }
 
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::loadObjects()
+void UserDefinedSQLObjectsZooKeeperStorage::loadObjects()
 {
     /// loadObjects() is called at start from Server::main(), so it's better not to stop here on no connection to ZooKeeper or any other error.
     /// However the watching thread must be started anyway in case the connection will be established later.
@@ -136,7 +136,7 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::loadObjects()
 }
 
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::processWatchQueue()
+void UserDefinedSQLObjectsZooKeeperStorage::processWatchQueue()
 {
     LOG_DEBUG(log, "Started watching thread");
     setThreadName("UserDefObjWatch");
@@ -173,13 +173,13 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::processWatchQueue()
 }
 
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::stopWatching()
+void UserDefinedSQLObjectsZooKeeperStorage::stopWatching()
 {
     stopWatchingThread();
 }
 
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::reloadObjects()
+void UserDefinedSQLObjectsZooKeeperStorage::reloadObjects()
 {
     auto zookeeper = getZooKeeper();
     refreshAllObjects(zookeeper);
@@ -187,23 +187,24 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::reloadObjects()
 }
 
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name)
+void UserDefinedSQLObjectsZooKeeperStorage::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name)
 {
     auto zookeeper = getZooKeeper();
     refreshObject(zookeeper, object_type, object_name);
 }
 
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::createRootNodes(const zkutil::ZooKeeperPtr & zookeeper)
+void UserDefinedSQLObjectsZooKeeperStorage::createRootNodes(const zkutil::ZooKeeperPtr & zookeeper)
 {
     zookeeper->createAncestors(zookeeper_path);
     zookeeper->createIfNotExists(zookeeper_path, "");
 }
 
-bool UserDefinedSQLObjectsLoaderFromZooKeeper::storeObject(
+bool UserDefinedSQLObjectsZooKeeperStorage::storeObjectImpl(
+    const ContextPtr & /*current_context*/,
     UserDefinedSQLObjectType object_type,
     const String & object_name,
-    const IAST & create_object_query,
+    ASTPtr create_object_query,
     bool throw_if_exists,
     bool replace_if_exists,
     const Settings &)
@@ -212,7 +213,7 @@ bool UserDefinedSQLObjectsLoaderFromZooKeeper::storeObject(
     LOG_DEBUG(log, "Storing user-defined object {} at zk path {}", backQuote(object_name), path);
 
     WriteBufferFromOwnString create_statement_buf;
-    formatAST(create_object_query, create_statement_buf, false);
+    formatAST(*create_object_query, create_statement_buf, false);
     writeChar('\n', create_statement_buf);
     String create_statement = create_statement_buf.str();
 
@@ -252,8 +253,11 @@ bool UserDefinedSQLObjectsLoaderFromZooKeeper::storeObject(
 }
 
 
-bool UserDefinedSQLObjectsLoaderFromZooKeeper::removeObject(
-    UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists)
+bool UserDefinedSQLObjectsZooKeeperStorage::removeObjectImpl(
+    const ContextPtr & /*current_context*/,
+    UserDefinedSQLObjectType object_type,
+    const String & object_name,
+    bool throw_if_not_exists)
 {
     String path = getNodePath(zookeeper_path, object_type, object_name);
     LOG_DEBUG(log, "Removing user-defined object {} at zk path {}", backQuote(object_name), path);
@@ -276,7 +280,7 @@ bool UserDefinedSQLObjectsLoaderFromZooKeeper::removeObject(
     return true;
 }
 
-bool UserDefinedSQLObjectsLoaderFromZooKeeper::getObjectDataAndSetWatch(
+bool UserDefinedSQLObjectsZooKeeperStorage::getObjectDataAndSetWatch(
     const zkutil::ZooKeeperPtr & zookeeper,
     String & data,
     const String & path,
@@ -298,7 +302,7 @@ bool UserDefinedSQLObjectsLoaderFromZooKeeper::getObjectDataAndSetWatch(
     return zookeeper->tryGetWatch(path, data, &entity_stat, object_watcher);
 }
 
-ASTPtr UserDefinedSQLObjectsLoaderFromZooKeeper::parseObjectData(const String & object_data, UserDefinedSQLObjectType object_type)
+ASTPtr UserDefinedSQLObjectsZooKeeperStorage::parseObjectData(const String & object_data, UserDefinedSQLObjectType object_type)
 {
     switch (object_type)
     {
@@ -317,7 +321,7 @@ ASTPtr UserDefinedSQLObjectsLoaderFromZooKeeper::parseObjectData(const String &
     UNREACHABLE();
 }
 
-ASTPtr UserDefinedSQLObjectsLoaderFromZooKeeper::tryLoadObject(
+ASTPtr UserDefinedSQLObjectsZooKeeperStorage::tryLoadObject(
     const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type, const String & object_name)
 {
     String path = getNodePath(zookeeper_path, object_type, object_name);
@@ -343,7 +347,7 @@ ASTPtr UserDefinedSQLObjectsLoaderFromZooKeeper::tryLoadObject(
     }
 }
 
-Strings UserDefinedSQLObjectsLoaderFromZooKeeper::getObjectNamesAndSetWatch(
+Strings UserDefinedSQLObjectsZooKeeperStorage::getObjectNamesAndSetWatch(
     const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type)
 {
     auto object_list_watcher = [my_watch_queue = watch_queue, object_type](const Coordination::WatchResponse &)
@@ -371,7 +375,7 @@ Strings UserDefinedSQLObjectsLoaderFromZooKeeper::getObjectNamesAndSetWatch(
     return object_names;
 }
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::refreshAllObjects(const zkutil::ZooKeeperPtr & zookeeper)
+void UserDefinedSQLObjectsZooKeeperStorage::refreshAllObjects(const zkutil::ZooKeeperPtr & zookeeper)
 {
     /// It doesn't make sense to keep the old watch events because we will reread everything in this function.
     watch_queue->clear();
@@ -380,7 +384,7 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::refreshAllObjects(const zkutil::Z
     objects_loaded = true;
 }
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::refreshObjects(const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type)
+void UserDefinedSQLObjectsZooKeeperStorage::refreshObjects(const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type)
 {
     LOG_DEBUG(log, "Refreshing all user-defined {} objects", object_type);
     Strings object_names = getObjectNamesAndSetWatch(zookeeper, object_type);
@@ -393,21 +397,20 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::refreshObjects(const zkutil::ZooK
             function_names_and_asts.emplace_back(function_name, ast);
     }
 
-    UserDefinedSQLFunctionFactory::instance().setAllFunctions(function_names_and_asts);
+    setAllObjects(function_names_and_asts);
 
     LOG_DEBUG(log, "All user-defined {} objects refreshed", object_type);
 }
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::syncObjects(const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type)
+void UserDefinedSQLObjectsZooKeeperStorage::syncObjects(const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type)
 {
     LOG_DEBUG(log, "Syncing user-defined {} objects", object_type);
     Strings object_names = getObjectNamesAndSetWatch(zookeeper, object_type);
 
-    auto & factory = UserDefinedSQLFunctionFactory::instance();
-    auto lock = factory.getLock();
+    getLock();
 
     /// Remove stale objects
-    factory.removeAllFunctionsExcept(object_names);
+    removeAllObjectsExcept(object_names);
     /// Read & parse only new SQL objects from ZooKeeper
     for (const auto & function_name : object_names)
     {
@@ -418,16 +421,15 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::syncObjects(const zkutil::ZooKeep
     LOG_DEBUG(log, "User-defined {} objects synced", object_type);
 }
 
-void UserDefinedSQLObjectsLoaderFromZooKeeper::refreshObject(
+void UserDefinedSQLObjectsZooKeeperStorage::refreshObject(
     const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type, const String & object_name)
 {
     auto ast = tryLoadObject(zookeeper, object_type, object_name);
-    auto & factory = UserDefinedSQLFunctionFactory::instance();
 
     if (ast)
-        factory.setFunction(object_name, *ast);
+        setObject(object_name, *ast);
     else
-        factory.removeFunction(object_name);
+        removeObject(object_name);
 }
 
 }
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.h b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h
similarity index 80%
rename from src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.h
rename to src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h
index 38e061fd4d9..9f41763c59c 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.h
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h>
 #include <Interpreters/Context_fwd.h>
 #include <Parsers/IAST_fwd.h>
 #include <Common/ConcurrentBoundedQueue.h>
@@ -12,11 +12,11 @@ namespace DB
 {
 
 /// Loads user-defined sql objects from ZooKeeper.
-class UserDefinedSQLObjectsLoaderFromZooKeeper : public IUserDefinedSQLObjectsLoader
+class UserDefinedSQLObjectsZooKeeperStorage : public UserDefinedSQLObjectsStorageBase
 {
 public:
-    UserDefinedSQLObjectsLoaderFromZooKeeper(const ContextPtr & global_context_, const String & zookeeper_path_);
-    ~UserDefinedSQLObjectsLoaderFromZooKeeper() override;
+    UserDefinedSQLObjectsZooKeeperStorage(const ContextPtr & global_context_, const String & zookeeper_path_);
+    ~UserDefinedSQLObjectsZooKeeperStorage() override;
 
     bool isReplicated() const override { return true; }
     String getReplicationID() const override { return zookeeper_path; }
@@ -26,16 +26,21 @@ public:
     void reloadObjects() override;
     void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) override;
 
-    bool storeObject(
+private:
+    bool storeObjectImpl(
+        const ContextPtr & current_context,
         UserDefinedSQLObjectType object_type,
         const String & object_name,
-        const IAST & create_object_query,
+        ASTPtr create_object_query,
         bool throw_if_exists,
         bool replace_if_exists,
         const Settings & settings) override;
-    bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) override;
+    bool removeObjectImpl(
+        const ContextPtr & current_context,
+        UserDefinedSQLObjectType object_type,
+        const String & object_name,
+        bool throw_if_not_exists) override;
 
-private:
     void processWatchQueue();
 
     zkutil::ZooKeeperPtr getZooKeeper();
diff --git a/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h
deleted file mode 100644
index b3a4623dba3..00000000000
--- a/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-#include <Interpreters/Context_fwd.h>
-
-
-namespace DB
-{
-class IUserDefinedSQLObjectsLoader;
-
-std::unique_ptr<IUserDefinedSQLObjectsLoader> createUserDefinedSQLObjectsLoader(const ContextMutablePtr & global_context);
-
-}
diff --git a/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp b/src/Functions/UserDefined/createUserDefinedSQLObjectsStorage.cpp
similarity index 61%
rename from src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp
rename to src/Functions/UserDefined/createUserDefinedSQLObjectsStorage.cpp
index b7ebc7abf14..f8847024508 100644
--- a/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp
+++ b/src/Functions/UserDefined/createUserDefinedSQLObjectsStorage.cpp
@@ -1,6 +1,6 @@
-#include <Functions/UserDefined/createUserDefinedSQLObjectsLoader.h>
-#include <Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h>
-#include <Functions/UserDefined/UserDefinedSQLObjectsLoaderFromZooKeeper.h>
+#include <Functions/UserDefined/createUserDefinedSQLObjectsStorage.h>
+#include <Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h>
+#include <Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h>
 #include <Interpreters/Context.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <filesystem>
@@ -17,7 +17,7 @@ namespace ErrorCodes
     extern const int INVALID_CONFIG_PARAMETER;
 }
 
-std::unique_ptr<IUserDefinedSQLObjectsLoader> createUserDefinedSQLObjectsLoader(const ContextMutablePtr & global_context)
+std::unique_ptr<IUserDefinedSQLObjectsStorage> createUserDefinedSQLObjectsStorage(const ContextMutablePtr & global_context)
 {
     const String zookeeper_path_key = "user_defined_zookeeper_path";
     const String disk_path_key = "user_defined_path";
@@ -33,12 +33,12 @@ std::unique_ptr<IUserDefinedSQLObjectsLoader> createUserDefinedSQLObjectsLoader(
                 zookeeper_path_key,
                 disk_path_key);
         }
-        return std::make_unique<UserDefinedSQLObjectsLoaderFromZooKeeper>(global_context, config.getString(zookeeper_path_key));
+        return std::make_unique<UserDefinedSQLObjectsZooKeeperStorage>(global_context, config.getString(zookeeper_path_key));
     }
 
     String default_path = fs::path{global_context->getPath()} / "user_defined/";
     String path = config.getString(disk_path_key, default_path);
-    return std::make_unique<UserDefinedSQLObjectsLoaderFromDisk>(global_context, path);
+    return std::make_unique<UserDefinedSQLObjectsDiskStorage>(global_context, path);
 }
 
 }
diff --git a/src/Functions/UserDefined/createUserDefinedSQLObjectsStorage.h b/src/Functions/UserDefined/createUserDefinedSQLObjectsStorage.h
new file mode 100644
index 00000000000..01659372dec
--- /dev/null
+++ b/src/Functions/UserDefined/createUserDefinedSQLObjectsStorage.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include <Interpreters/Context_fwd.h>
+
+
+namespace DB
+{
+class IUserDefinedSQLObjectsStorage;
+
+std::unique_ptr<IUserDefinedSQLObjectsStorage> createUserDefinedSQLObjectsStorage(const ContextMutablePtr & global_context);
+
+}
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 0a8a8f1f529..248b61f6e9b 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -65,8 +65,8 @@
 #include <Interpreters/EmbeddedDictionaries.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h>
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
-#include <Functions/UserDefined/createUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
+#include <Functions/UserDefined/createUserDefinedSQLObjectsStorage.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/InterserverCredentials.h>
 #include <Interpreters/Cluster.h>
@@ -253,8 +253,8 @@ struct ContextSharedPart : boost::noncopyable
     ExternalLoaderXMLConfigRepository * user_defined_executable_functions_config_repository TSA_GUARDED_BY(external_user_defined_executable_functions_mutex) = nullptr;
     scope_guard user_defined_executable_functions_xmls TSA_GUARDED_BY(external_user_defined_executable_functions_mutex);
 
-    mutable OnceFlag user_defined_sql_objects_loader_initialized;
-    mutable std::unique_ptr<IUserDefinedSQLObjectsLoader> user_defined_sql_objects_loader;
+    mutable OnceFlag user_defined_sql_objects_storage_initialized;
+    mutable std::unique_ptr<IUserDefinedSQLObjectsStorage> user_defined_sql_objects_storage;
 
 #if USE_NLP
     mutable OnceFlag synonyms_extensions_initialized;
@@ -545,7 +545,7 @@ struct ContextSharedPart : boost::noncopyable
 
         SHUTDOWN(log, "dictionaries loader", external_dictionaries_loader, enablePeriodicUpdates(false));
         SHUTDOWN(log, "UDFs loader", external_user_defined_executable_functions_loader, enablePeriodicUpdates(false));
-        SHUTDOWN(log, "another UDFs loader", user_defined_sql_objects_loader, stopWatching());
+        SHUTDOWN(log, "another UDFs storage", user_defined_sql_objects_storage, stopWatching());
 
         LOG_TRACE(log, "Shutting down named sessions");
         Session::shutdownNamedSessions();
@@ -572,7 +572,7 @@ struct ContextSharedPart : boost::noncopyable
         std::unique_ptr<EmbeddedDictionaries> delete_embedded_dictionaries;
         std::unique_ptr<ExternalDictionariesLoader> delete_external_dictionaries_loader;
         std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> delete_external_user_defined_executable_functions_loader;
-        std::unique_ptr<IUserDefinedSQLObjectsLoader> delete_user_defined_sql_objects_loader;
+        std::unique_ptr<IUserDefinedSQLObjectsStorage> delete_user_defined_sql_objects_storage;
         std::unique_ptr<BackgroundSchedulePool> delete_buffer_flush_schedule_pool;
         std::unique_ptr<BackgroundSchedulePool> delete_schedule_pool;
         std::unique_ptr<BackgroundSchedulePool> delete_distributed_schedule_pool;
@@ -652,7 +652,7 @@ struct ContextSharedPart : boost::noncopyable
             delete_embedded_dictionaries = std::move(embedded_dictionaries);
             delete_external_dictionaries_loader = std::move(external_dictionaries_loader);
             delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader);
-            delete_user_defined_sql_objects_loader = std::move(user_defined_sql_objects_loader);
+            delete_user_defined_sql_objects_storage = std::move(user_defined_sql_objects_storage);
             delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool);
             delete_schedule_pool = std::move(schedule_pool);
             delete_distributed_schedule_pool = std::move(distributed_schedule_pool);
@@ -670,7 +670,7 @@ struct ContextSharedPart : boost::noncopyable
         delete_embedded_dictionaries.reset();
         delete_external_dictionaries_loader.reset();
         delete_external_user_defined_executable_functions_loader.reset();
-        delete_user_defined_sql_objects_loader.reset();
+        delete_user_defined_sql_objects_storage.reset();
         delete_ddl_worker.reset();
         delete_buffer_flush_schedule_pool.reset();
         delete_schedule_pool.reset();
@@ -2448,24 +2448,30 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr
     shared->user_defined_executable_functions_xmls = external_user_defined_executable_functions_loader.addConfigRepository(std::move(repository));
 }
 
-const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() const
+const IUserDefinedSQLObjectsStorage & Context::getUserDefinedSQLObjectsStorage() const
 {
-    callOnce(shared->user_defined_sql_objects_loader_initialized, [&] {
-        shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext());
+    callOnce(shared->user_defined_sql_objects_storage_initialized, [&] {
+        shared->user_defined_sql_objects_storage = createUserDefinedSQLObjectsStorage(getGlobalContext());
     });
 
     SharedLockGuard lock(shared->mutex);
-    return *shared->user_defined_sql_objects_loader;
+    return *shared->user_defined_sql_objects_storage;
 }
 
-IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader()
+IUserDefinedSQLObjectsStorage & Context::getUserDefinedSQLObjectsStorage()
 {
-    callOnce(shared->user_defined_sql_objects_loader_initialized, [&] {
-        shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext());
+    callOnce(shared->user_defined_sql_objects_storage_initialized, [&] {
+        shared->user_defined_sql_objects_storage = createUserDefinedSQLObjectsStorage(getGlobalContext());
     });
 
-    SharedLockGuard lock(shared->mutex);
-    return *shared->user_defined_sql_objects_loader;
+    std::lock_guard lock(shared->mutex);
+    return *shared->user_defined_sql_objects_storage;
+}
+
+void Context::setUserDefinedSQLObjectsStorage(std::unique_ptr<IUserDefinedSQLObjectsStorage> storage)
+{
+    std::lock_guard lock(shared->mutex);
+    shared->user_defined_sql_objects_storage = std::move(storage);
 }
 
 #if USE_NLP
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 8c169dd664f..63a919c5f1a 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -68,7 +68,7 @@ enum class RowPolicyFilterType;
 class EmbeddedDictionaries;
 class ExternalDictionariesLoader;
 class ExternalUserDefinedExecutableFunctionsLoader;
-class IUserDefinedSQLObjectsLoader;
+class IUserDefinedSQLObjectsStorage;
 class InterserverCredentials;
 using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
 class InterserverIOHandler;
@@ -802,8 +802,9 @@ public:
 
     const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const;
     ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader();
-    const IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader() const;
-    IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader();
+    const IUserDefinedSQLObjectsStorage & getUserDefinedSQLObjectsStorage() const;
+    IUserDefinedSQLObjectsStorage & getUserDefinedSQLObjectsStorage();
+    void setUserDefinedSQLObjectsStorage(std::unique_ptr<IUserDefinedSQLObjectsStorage> storage);
     void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config);
 
 #if USE_NLP
diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp
index 3e87f4fe440..b155476fd79 100644
--- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp
+++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp
@@ -1,7 +1,7 @@
 #include <Interpreters/InterpreterCreateFunctionQuery.h>
 
 #include <Access/ContextAccess.h>
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
 #include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
@@ -32,7 +32,7 @@ BlockIO InterpreterCreateFunctionQuery::execute()
 
     if (!create_function_query.cluster.empty())
     {
-        if (current_context->getUserDefinedSQLObjectsLoader().isReplicated())
+        if (current_context->getUserDefinedSQLObjectsStorage().isReplicated())
             throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because used-defined functions are replicated automatically");
 
         DDLQueryOnClusterParams params;
diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp
index af60d9c5df7..c2cd24044da 100644
--- a/src/Interpreters/InterpreterDropFunctionQuery.cpp
+++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp
@@ -1,7 +1,7 @@
 #include <Interpreters/InterpreterDropFunctionQuery.h>
 
 #include <Access/ContextAccess.h>
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
 #include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/FunctionNameNormalizer.h>
@@ -32,7 +32,7 @@ BlockIO InterpreterDropFunctionQuery::execute()
 
     if (!drop_function_query.cluster.empty())
     {
-        if (current_context->getUserDefinedSQLObjectsLoader().isReplicated())
+        if (current_context->getUserDefinedSQLObjectsStorage().isReplicated())
             throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because used-defined functions are replicated automatically");
 
         DDLQueryOnClusterParams params;
diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp
index 7dc452a0fcb..bee9a54cd0d 100644
--- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp
+++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp
@@ -3,7 +3,7 @@
 #include <Access/AccessControl.h>
 #include <Access/ReplicatedAccessStorage.h>
 #include <Common/logger_useful.h>
-#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
+#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
 #include <Interpreters/Context.h>
 #include <Parsers/ASTCreateFunctionQuery.h>
 #include <Parsers/ASTDropFunctionQuery.h>
@@ -45,7 +45,7 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c
 
     if ((isUserDefinedFunctionQuery(query)
          && context->getSettings().ignore_on_cluster_for_replicated_udf_queries
-         && context->getUserDefinedSQLObjectsLoader().isReplicated())
+         && context->getUserDefinedSQLObjectsStorage().isReplicated())
         || (isAccessControlQuery(query)
             && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries
             && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE)))
diff --git a/tests/integration/test_replicated_user_defined_functions/test.py b/tests/integration/test_replicated_user_defined_functions/test.py
index f54be21c4c0..e5f6683b90b 100644
--- a/tests/integration/test_replicated_user_defined_functions/test.py
+++ b/tests/integration/test_replicated_user_defined_functions/test.py
@@ -116,7 +116,7 @@ def test_create_and_replace():
     node1.query("CREATE FUNCTION f1 AS (x, y) -> x + y")
     assert node1.query("SELECT f1(12, 3)") == "15\n"
 
-    expected_error = "User-defined function 'f1' already exists"
+    expected_error = "User-defined object 'f1' already exists"
     assert expected_error in node1.query_and_get_error(
         "CREATE FUNCTION f1 AS (x, y) -> x + 2 * y"
     )
@@ -135,7 +135,7 @@ def test_drop_if_exists():
     node1.query("DROP FUNCTION IF EXISTS f1")
     node1.query("DROP FUNCTION IF EXISTS f1")
 
-    expected_error = "User-defined function 'f1' doesn't exist"
+    expected_error = "User-defined object 'f1' doesn't exist"
     assert expected_error in node1.query_and_get_error("DROP FUNCTION f1")
 
 

From 2cd7762a1a859e4a08431d58d0f68ad069142b4e Mon Sep 17 00:00:00 2001
From: Chen Lixiang <chenlixiang.dev@gmail.com>
Date: Wed, 13 Dec 2023 13:35:02 +0800
Subject: [PATCH 041/137] fix long test issue

---
 .../00753_system_columns_and_system_tables_long.reference       | 2 +-
 .../0_stateless/00753_system_columns_and_system_tables_long.sql | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
index f834bcede27..dd5860ae491 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference
@@ -53,4 +53,4 @@ Check total_bytes/total_rows for Join
 1	100
 Check total_uncompressed_bytes/total_bytes/total_rows for Materialized views
 0	0	0
-117	397	1
+1	1	1
diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
index c21f5c12b2f..51818228913 100644
--- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
+++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql
@@ -155,6 +155,6 @@ SELECT 'Check total_uncompressed_bytes/total_bytes/total_rows for Materialized v
 CREATE MATERIALIZED VIEW check_system_tables_mv ENGINE = MergeTree() ORDER BY name2 AS SELECT name1, name2, name3 FROM check_system_tables;
 SELECT total_bytes_uncompressed, total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables_mv' AND database = currentDatabase();
 INSERT INTO check_system_tables VALUES (1, 1, 1);
-SELECT total_bytes_uncompressed, total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables_mv' AND database = currentDatabase();
+SELECT total_bytes_uncompressed > 0, total_bytes > 0, total_rows FROM system.tables WHERE name = 'check_system_tables_mv' AND database = currentDatabase();
 DROP TABLE check_system_tables_mv;
 DROP TABLE check_system_tables;

From 5121bfcd78f79b7d02576cfe1a01a137bf5c09cc Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 13 Dec 2023 07:16:30 +0000
Subject: [PATCH 042/137] major refactoring

---
 .../StorageSystemDroppedTablesParts.cpp       | 323 ++++--------------
 .../System/StorageSystemDroppedTablesParts.h  |  20 +-
 src/Storages/System/StorageSystemParts.h      |   2 +-
 .../System/StorageSystemPartsBase.cpp         |  55 +--
 src/Storages/System/StorageSystemPartsBase.h  |  73 +++-
 5 files changed, 145 insertions(+), 328 deletions(-)

diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
index 1770610c7df..ac8c65def8e 100644
--- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp
+++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
@@ -1,3 +1,6 @@
+#include <Storages/StorageMaterializedMySQL.h>
+#include <Storages/VirtualColumnUtils.h>
+#include <Access/ContextAccess.h>
 #include <Storages/System/StorageSystemDroppedTablesParts.h>
 #include <atomic>
 #include <memory>
@@ -15,288 +18,78 @@
 #include <Interpreters/Context.h>
 
 
-namespace
-{
-
-std::string_view getRemovalStateDescription(DB::DataPartRemovalState state)
-{
-    switch (state)
-    {
-    case DB::DataPartRemovalState::NOT_ATTEMPTED:
-        return "Cleanup thread hasn't seen this part yet";
-    case DB::DataPartRemovalState::VISIBLE_TO_TRANSACTIONS:
-        return "Part maybe visible for transactions";
-    case DB::DataPartRemovalState::NON_UNIQUE_OWNERSHIP:
-        return "Part ownership is not unique";
-    case DB::DataPartRemovalState::NOT_REACHED_REMOVAL_TIME:
-        return "Part hasn't reached removal time yet";
-    case DB::DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT:
-        return "Waiting mutation parent to be removed";
-    case DB::DataPartRemovalState::EMPTY_PART_COVERS_OTHER_PARTS:
-        return "Waiting for covered parts to be removed first";
-    case DB::DataPartRemovalState::REMOVED:
-        return "Part was selected to be removed";
-    }
-}
-
-}
-
 namespace DB
 {
 
-namespace ErrorCodes
+
+StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context)
+        : StoragesInfoStreamBase(context)
 {
-    extern const int LOGICAL_ERROR;
-}
+    needsLock = false;
 
-NamesAndTypesList StorageSystemDroppedTablesParts::getNamesAndTypes()
-{
-    NamesAndTypesList names_and_types{
-        {"partition",                                   std::make_shared<DataTypeString>()},
-        {"name",                                        std::make_shared<DataTypeString>()},
-        {"uuid",                                        std::make_shared<DataTypeUUID>()},
-        {"part_type",                                   std::make_shared<DataTypeString>()},
-        {"active",                                      std::make_shared<DataTypeUInt8>()},
-        {"marks",                                       std::make_shared<DataTypeUInt64>()},
-        {"rows",                                        std::make_shared<DataTypeUInt64>()},
-        {"bytes_on_disk",                               std::make_shared<DataTypeUInt64>()},
-        {"data_compressed_bytes",                       std::make_shared<DataTypeUInt64>()},
-        {"data_uncompressed_bytes",                     std::make_shared<DataTypeUInt64>()},
-        {"primary_key_size",                            std::make_shared<DataTypeUInt64>()},
-        {"marks_bytes",                                 std::make_shared<DataTypeUInt64>()},
-        {"secondary_indices_compressed_bytes",          std::make_shared<DataTypeUInt64>()},
-        {"secondary_indices_uncompressed_bytes",        std::make_shared<DataTypeUInt64>()},
-        {"secondary_indices_marks_bytes",               std::make_shared<DataTypeUInt64>()},
-        {"modification_time",                           std::make_shared<DataTypeDateTime>()},
-        {"remove_time",                                 std::make_shared<DataTypeDateTime>()},
-        {"refcount",                                    std::make_shared<DataTypeUInt32>()},
-        {"min_date",                                    std::make_shared<DataTypeDate>()},
-        {"max_date",                                    std::make_shared<DataTypeDate>()},
-        {"min_time",                                    std::make_shared<DataTypeDateTime>()},
-        {"max_time",                                    std::make_shared<DataTypeDateTime>()},
-        {"partition_id",                                std::make_shared<DataTypeString>()},
-        {"min_block_number",                            std::make_shared<DataTypeInt64>()},
-        {"max_block_number",                            std::make_shared<DataTypeInt64>()},
-        {"level",                                       std::make_shared<DataTypeUInt32>()},
-        {"data_version",                                std::make_shared<DataTypeUInt64>()},
-        {"primary_key_bytes_in_memory",                 std::make_shared<DataTypeUInt64>()},
-        {"primary_key_bytes_in_memory_allocated",       std::make_shared<DataTypeUInt64>()},
-        {"is_frozen",                                   std::make_shared<DataTypeUInt8>()},
+    /// Will apply WHERE to subset of columns and then add more columns.
+    /// This is kind of complicated, but we use WHERE to do less work.
 
-        {"database",                                    std::make_shared<DataTypeString>()},
-        {"table",                                       std::make_shared<DataTypeString>()},
-        {"engine",                                      std::make_shared<DataTypeString>()},
-        {"disk_name",                                   std::make_shared<DataTypeString>()},
-        {"path",                                        std::make_shared<DataTypeString>()},
+    Block block_to_filter;
 
-        {"hash_of_all_files",                           std::make_shared<DataTypeString>()},
-        {"hash_of_uncompressed_files",                  std::make_shared<DataTypeString>()},
-        {"uncompressed_hash_of_compressed_files",       std::make_shared<DataTypeString>()},
+    MutableColumnPtr database_column_mut = ColumnString::create();
+    MutableColumnPtr table_column_mut = ColumnString::create();
+    MutableColumnPtr engine_column_mut = ColumnString::create();
+    MutableColumnPtr active_column_mut = ColumnUInt8::create();
 
-        {"delete_ttl_info_min",                         std::make_shared<DataTypeDateTime>()},
-        {"delete_ttl_info_max",                         std::make_shared<DataTypeDateTime>()},
+    const auto access = context->getAccess();
+    const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES);
 
-        {"move_ttl_info.expression",                    std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
-        {"move_ttl_info.min",                           std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-        {"move_ttl_info.max",                           std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-
-        {"default_compression_codec",                   std::make_shared<DataTypeString>()},
-
-        {"recompression_ttl_info.expression",           std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
-        {"recompression_ttl_info.min",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-        {"recompression_ttl_info.max",                  std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-
-        {"group_by_ttl_info.expression",                std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
-        {"group_by_ttl_info.min",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-        {"group_by_ttl_info.max",                       std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-
-        {"rows_where_ttl_info.expression",              std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
-        {"rows_where_ttl_info.min",                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-        {"rows_where_ttl_info.max",                     std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>())},
-
-        {"projections",                                 std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
-
-        {"visible",                                     std::make_shared<DataTypeUInt8>()},
-        {"creation_tid",                                getTransactionIDDataType()},
-        {"removal_tid_lock",                            std::make_shared<DataTypeUInt64>()},
-        {"removal_tid",                                 getTransactionIDDataType()},
-        {"creation_csn",                                std::make_shared<DataTypeUInt64>()},
-        {"removal_csn",                                 std::make_shared<DataTypeUInt64>()},
-
-        {"has_lightweight_delete",                      std::make_shared<DataTypeUInt8>()},
-
-        {"last_removal_attempt_time",                    std::make_shared<DataTypeDateTime>()},
-        {"removal_state",                               std::make_shared<DataTypeString>()},
-    };
-    return names_and_types;
-}
-
-
-void StorageSystemDroppedTablesParts::fillData(MutableColumns & columns, ContextPtr context, const SelectQueryInfo &) const
-{
     auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped();
-
-    for (const auto & storage : tables_mark_dropped)
+    for (const auto & dropped_table : tables_mark_dropped)
     {
-        const auto * data = dynamic_cast<MergeTreeData *>(storage.table.get());
-        if (!data)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", storage.table->getName());
+        StoragePtr storage = dropped_table.table;
+        if (!storage)
+            continue;
 
-        using State = MergeTreeData::DataPartState;
-
-        MergeTreeData::DataPartStateVector all_parts_state;
-        auto all_parts = data->getDataPartsVectorForInternalUsage({State::Active, State::Outdated}, &all_parts_state);
-
-        for (size_t part_number = 0; part_number < all_parts.size(); ++part_number)
+        String database_name = storage->getStorageID().getDatabaseName();
+        String table_name = storage->getStorageID().getTableName();
+        String engine_name = storage->getName();
+#if USE_MYSQL
+        if (auto * proxy = dynamic_cast<StorageMaterializedMySQL *>(storage.get()))
         {
-            const auto & part = all_parts[part_number];
-            auto part_state = all_parts_state[part_number];
-
-            ColumnSize columns_size = part->getTotalColumnsSize();
-            ColumnSize secondary_indexes_size = part->getTotalSeconaryIndicesSize();
-
-            size_t res_index = 0;
-
-            {
-                WriteBufferFromOwnString out;
-                part->partition.serializeText(*data, out, FormatSettings{});
-                columns[res_index++]->insert(out.str());
-            }
-            columns[res_index++]->insert(part->name);
-            columns[res_index++]->insert(part->uuid);
-            columns[res_index++]->insert(part->getTypeName());
-            columns[res_index++]->insert(part_state == State::Active);
-
-
-            columns[res_index++]->insert(part->getMarksCount());
-            columns[res_index++]->insert(part->rows_count);
-            columns[res_index++]->insert(part->getBytesOnDisk());
-            columns[res_index++]->insert(columns_size.data_compressed);
-            columns[res_index++]->insert(columns_size.data_uncompressed);
-            columns[res_index++]->insert(part->getIndexSizeFromFile());
-            columns[res_index++]->insert(columns_size.marks);
-            columns[res_index++]->insert(secondary_indexes_size.data_compressed);
-            columns[res_index++]->insert(secondary_indexes_size.data_uncompressed);
-            columns[res_index++]->insert(secondary_indexes_size.marks);
-            columns[res_index++]->insert(static_cast<UInt64>(part->modification_time));
-            {
-                time_t remove_time = part->remove_time.load(std::memory_order_relaxed);
-                columns[res_index++]->insert(static_cast<UInt64>(remove_time == std::numeric_limits<time_t>::max() ? 0 : remove_time));
-            }
-
-            /// For convenience, in returned refcount, don't add references that was due to local variables in this method: all_parts, active_parts.
-            columns[res_index++]->insert(static_cast<UInt64>(part.use_count() - 1));
-
-            auto min_max_date = part->getMinMaxDate();
-            auto min_max_time = part->getMinMaxTime();
-
-            columns[res_index++]->insert(min_max_date.first);
-            columns[res_index++]->insert(min_max_date.second);
-            columns[res_index++]->insert(static_cast<UInt32>(min_max_time.first));
-            columns[res_index++]->insert(static_cast<UInt32>(min_max_time.second));
-            columns[res_index++]->insert(part->info.partition_id);
-            columns[res_index++]->insert(part->info.min_block);
-            columns[res_index++]->insert(part->info.max_block);
-            columns[res_index++]->insert(part->info.level);
-            columns[res_index++]->insert(static_cast<UInt64>(part->info.getDataVersion()));
-            columns[res_index++]->insert(part->getIndexSizeInBytes());
-            columns[res_index++]->insert(part->getIndexSizeInAllocatedBytes());
-            columns[res_index++]->insert(part->is_frozen.load(std::memory_order_relaxed));
-
-            columns[res_index++]->insert(storage.table->getStorageID().getDatabaseName());
-            columns[res_index++]->insert(storage.table->getStorageID().getTableName());
-            columns[res_index++]->insert(storage.table->getName());
-
-            {
-                if (part->isStoredOnDisk())
-                    columns[res_index++]->insert(part->getDataPartStorage().getDiskName());
-                else
-                    columns[res_index++]->insertDefault();
-            }
-
-            /// The full path changes at clean up thread, so do not read it if parts can be deleted, avoid the race.
-            if (part->isStoredOnDisk()
-                && part_state != State::Deleting && part_state != State::DeleteOnDestroy && part_state != State::Temporary)
-            {
-                columns[res_index++]->insert(part->getDataPartStorage().getFullPath());
-            }
-            else
-                columns[res_index++]->insertDefault();
-
-
-            {
-                MinimalisticDataPartChecksums helper;
-                helper.computeTotalChecksums(part->checksums);
-
-                columns[res_index++]->insert(getHexUIntLowercase(helper.hash_of_all_files));
-                columns[res_index++]->insert(getHexUIntLowercase(helper.hash_of_uncompressed_files));
-                columns[res_index++]->insert(getHexUIntLowercase(helper.uncompressed_hash_of_compressed_files));
-            }
-
-            /// delete_ttl_info
-            columns[res_index++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.min));
-            columns[res_index++]->insert(static_cast<UInt32>(part->ttl_infos.table_ttl.max));
-
-            auto add_ttl_info_map = [&](const TTLInfoMap & ttl_info_map)
-            {
-                Array expression_array;
-                Array min_array;
-                Array max_array;
-
-                expression_array.reserve(ttl_info_map.size());
-                min_array.reserve(ttl_info_map.size());
-                max_array.reserve(ttl_info_map.size());
-                for (const auto & [expression, ttl_info] : ttl_info_map)
-                {
-                    expression_array.emplace_back(expression);
-                    min_array.push_back(static_cast<UInt32>(ttl_info.min));
-                    max_array.push_back(static_cast<UInt32>(ttl_info.max));
-                }
-                columns[res_index++]->insert(expression_array);
-                columns[res_index++]->insert(min_array);
-                columns[res_index++]->insert(max_array);
-            };
-
-            add_ttl_info_map(part->ttl_infos.moves_ttl);
-
-            columns[res_index++]->insert(queryToString(part->default_codec->getCodecDesc()));
-
-            add_ttl_info_map(part->ttl_infos.recompression_ttl);
-            add_ttl_info_map(part->ttl_infos.group_by_ttl);
-            add_ttl_info_map(part->ttl_infos.rows_where_ttl);
-
-            Array projections;
-            for (const auto & [name, _] : part->getProjectionParts())
-                projections.push_back(name);
-
-            columns[res_index++]->insert(projections);
-
-            {
-                auto txn = context->getCurrentTransaction();
-                if (txn)
-                    columns[res_index++]->insert(part->version.isVisible(*txn));
-                else
-                    columns[res_index++]->insert(part_state == State::Active);
-            }
-
-            auto get_tid_as_field = [](const TransactionID & tid) -> Field
-            {
-                return Tuple{tid.start_csn, tid.local_tid, tid.host_id};
-            };
-
-            columns[res_index++]->insert(get_tid_as_field(part->version.creation_tid));
-            columns[res_index++]->insert(part->version.removal_tid_lock.load(std::memory_order_relaxed));
-            columns[res_index++]->insert(get_tid_as_field(part->version.getRemovalTID()));
-            columns[res_index++]->insert(part->version.creation_csn.load(std::memory_order_relaxed));
-            columns[res_index++]->insert(part->version.removal_csn.load(std::memory_order_relaxed));
-            columns[res_index++]->insert(part->hasLightweightDelete());
-            columns[res_index++]->insert(static_cast<UInt64>(part->last_removal_attempt_time.load(std::memory_order_relaxed)));
-            columns[res_index++]->insert(getRemovalStateDescription(part->removal_state.load(std::memory_order_relaxed)));
-
+            auto nested = proxy->getNested();
+            storage.swap(nested);
         }
+#endif
+        if (!dynamic_cast<MergeTreeData *>(storage.get()))
+            continue;
 
+        if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
+            continue;
+
+        storages[std::make_pair(database_name, table_name)] = storage;
+
+        /// Add all combinations of flag 'active'.
+        for (UInt64 active : {0, 1})
+        {
+            database_column_mut->insert(database_name);
+            table_column_mut->insert(table_name);
+            engine_column_mut->insert(engine_name);
+            active_column_mut->insert(active);
+        }
     }
 
+    block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared<DataTypeString>(), "database"));
+    block_to_filter.insert(ColumnWithTypeAndName(std::move(table_column_mut), std::make_shared<DataTypeString>(), "table"));
+    block_to_filter.insert(ColumnWithTypeAndName(std::move(engine_column_mut), std::make_shared<DataTypeString>(), "engine"));
+    block_to_filter.insert(ColumnWithTypeAndName(std::move(active_column_mut), std::make_shared<DataTypeUInt8>(), "active"));
+
+    if (block_to_filter.rows())
+    {
+        /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'.
+        VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context);
+        rows = block_to_filter.rows();
+    }
+
+    database_column = block_to_filter.getByName("database").column;
+    table_column = block_to_filter.getByName("table").column;
+    active_column = block_to_filter.getByName("active").column;
 }
 
 
diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.h b/src/Storages/System/StorageSystemDroppedTablesParts.h
index 652d552007a..1a8a27e0b7c 100644
--- a/src/Storages/System/StorageSystemDroppedTablesParts.h
+++ b/src/Storages/System/StorageSystemDroppedTablesParts.h
@@ -1,25 +1,33 @@
 #pragma once
 
-#include <Storages/System/IStorageSystemOneBlock.h>
+#include <Storages/System/StorageSystemParts.h>
 
 
 namespace DB
 {
 
+class StoragesDroppedInfoStream : public StoragesInfoStreamBase
+{
+public:
+    StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context);
+};
+
 class Context;
 
 
 /** Implements system table 'dropped_tables_parts' which allows to get information about data parts for dropped but not yet removed tables.
   */
-class StorageSystemDroppedTablesParts final : public IStorageSystemOneBlock<StorageSystemDroppedTablesParts>
+class StorageSystemDroppedTablesParts final : public StorageSystemParts
 {
 public:
-    std::string getName() const override { return "SystemDroppedTablesParts"; }
-    static NamesAndTypesList getNamesAndTypes();
+    explicit StorageSystemDroppedTablesParts(const StorageID & table_id) : StorageSystemParts(table_id) {}
 
+    std::string getName() const override { return "SystemDroppedTablesParts"; }
 protected:
-    using IStorageSystemOneBlock::IStorageSystemOneBlock;
-    void fillData(MutableColumns & columns, ContextPtr context, const SelectQueryInfo &) const override;
+    std::unique_ptr<StoragesInfoStreamBase> getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) override
+    {
+        return std::make_unique<StoragesDroppedInfoStream>(query_info, context);
+    }
 };
 
 }
diff --git a/src/Storages/System/StorageSystemParts.h b/src/Storages/System/StorageSystemParts.h
index c7a46cfda54..e0082e40e7d 100644
--- a/src/Storages/System/StorageSystemParts.h
+++ b/src/Storages/System/StorageSystemParts.h
@@ -11,7 +11,7 @@ class Context;
 
 /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family.
   */
-class StorageSystemParts final : public StorageSystemPartsBase
+class StorageSystemParts : public StorageSystemPartsBase
 {
 public:
     explicit StorageSystemParts(const StorageID & table_id_);
diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index 513af6cfc46..bf476c3da71 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -83,7 +83,7 @@ StoragesInfo::getProjectionParts(MergeTreeData::DataPartStateVector & state, boo
 }
 
 StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context)
-    : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef())
+    : StoragesInfoStreamBase(context)
 {
     /// Will apply WHERE to subset of columns and then add more columns.
     /// This is kind of complicated, but we use WHERE to do less work.
@@ -189,57 +189,8 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
     database_column = block_to_filter.getByName("database").column;
     table_column = block_to_filter.getByName("table").column;
     active_column = block_to_filter.getByName("active").column;
-
-    next_row = 0;
 }
 
-StoragesInfo StoragesInfoStream::next()
-{
-    while (next_row < rows)
-    {
-        StoragesInfo info;
-
-        info.database = (*database_column)[next_row].get<String>();
-        info.table = (*table_column)[next_row].get<String>();
-
-        auto is_same_table = [&info, this] (size_t row) -> bool
-        {
-            return (*database_column)[row].get<String>() == info.database &&
-                   (*table_column)[row].get<String>() == info.table;
-        };
-
-        /// We may have two rows per table which differ in 'active' value.
-        /// If rows with 'active = 0' were not filtered out, this means we
-        /// must collect the inactive parts. Remember this fact in StoragesInfo.
-        for (; next_row < rows && is_same_table(next_row); ++next_row)
-        {
-            const auto active = (*active_column)[next_row].get<UInt64>();
-            if (active == 0)
-                info.need_inactive_parts = true;
-        }
-
-        info.storage = storages.at(std::make_pair(info.database, info.table));
-
-        /// For table not to be dropped and set of columns to remain constant.
-        info.table_lock = info.storage->tryLockForShare(query_id, settings.lock_acquire_timeout);
-
-        if (info.table_lock == nullptr)
-        {
-            // Table was dropped while acquiring the lock, skipping table
-            continue;
-        }
-
-        info.engine = info.storage->getName();
-
-        info.data = dynamic_cast<MergeTreeData *>(info.storage.get());
-        if (!info.data)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", info.engine);
-
-        return info;
-    }
-
-    return {};
-}
 
 Pipe StorageSystemPartsBase::read(
     const Names & column_names,
@@ -252,7 +203,7 @@ Pipe StorageSystemPartsBase::read(
 {
     bool has_state_column = hasStateColumn(column_names, storage_snapshot);
 
-    StoragesInfoStream stream(query_info, context);
+    auto stream = getStoragesInfoStream(query_info, context);
 
     /// Create the result.
     Block sample = storage_snapshot->metadata->getSampleBlock();
@@ -263,7 +214,7 @@ Pipe StorageSystemPartsBase::read(
     if (has_state_column)
         res_columns.push_back(ColumnString::create());
 
-    while (StoragesInfo info = stream.next())
+    while (StoragesInfo info = stream->next())
     {
         processNextStorage(context, res_columns, columns_mask, info, has_state_column);
     }
diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h
index c3d2e64b303..6ece6ae25b8 100644
--- a/src/Storages/System/StorageSystemPartsBase.h
+++ b/src/Storages/System/StorageSystemPartsBase.h
@@ -29,13 +29,64 @@ struct StoragesInfo
 };
 
 /** A helper class that enumerates the storages that match given query. */
-class StoragesInfoStream
+class StoragesInfoStreamBase
 {
 public:
-    StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context);
-    StoragesInfo next();
+    StoragesInfoStreamBase(ContextPtr context)
+        : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef()), next_row(0), rows(0)
+    {}
 
-private:
+    StoragesInfo next()
+    {
+        while (next_row < rows)
+        {
+            StoragesInfo info;
+
+            info.database = (*database_column)[next_row].get<String>();
+            info.table = (*table_column)[next_row].get<String>();
+
+            auto is_same_table = [&info, this] (size_t row) -> bool
+            {
+                return (*database_column)[row].get<String>() == info.database &&
+                    (*table_column)[row].get<String>() == info.table;
+            };
+
+            /// We may have two rows per table which differ in 'active' value.
+            /// If rows with 'active = 0' were not filtered out, this means we
+            /// must collect the inactive parts. Remember this fact in StoragesInfo.
+            for (; next_row < rows && is_same_table(next_row); ++next_row)
+            {
+                const auto active = (*active_column)[next_row].get<UInt64>();
+                if (active == 0)
+                    info.need_inactive_parts = true;
+            }
+
+            info.storage = storages.at(std::make_pair(info.database, info.table));
+
+            if (needsLock)
+            {
+                /// For table not to be dropped and set of columns to remain constant.
+                info.table_lock = info.storage->tryLockForShare(query_id, settings.lock_acquire_timeout);
+                if (info.table_lock == nullptr)
+                {
+                    // Table was dropped while acquiring the lock, skipping table
+                    continue;
+                }
+            }
+
+            info.engine = info.storage->getName();
+
+            info.data = dynamic_cast<MergeTreeData *>(info.storage.get());
+            if (!info.data)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", info.engine);
+
+            return info;
+        }
+
+        return {};
+    }
+
+protected:
     String query_id;
     Settings settings;
 
@@ -49,6 +100,15 @@ private:
 
     using StoragesMap = std::map<std::pair<String, String>, StoragePtr>;
     StoragesMap storages;
+
+    bool needsLock = true;
+};
+
+
+class StoragesInfoStream : public StoragesInfoStreamBase
+{
+public:
+    StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context);
 };
 
 /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family.
@@ -77,6 +137,11 @@ protected:
 
     StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_);
 
+    virtual std::unique_ptr<StoragesInfoStreamBase> getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context)
+    {
+        return std::make_unique<StoragesInfoStream>(query_info, context);
+    }
+
     virtual void
     processNextStorage(ContextPtr context, MutableColumns & columns, std::vector<UInt8> & columns_mask, const StoragesInfo & info, bool has_state_column) = 0;
 };

From 398499d25393026489721b747247122e4044a9e7 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 5 Dec 2023 18:29:18 +0000
Subject: [PATCH 043/137] Support SHARDS for HashedArrayDictionary

---
 docs/en/sql-reference/dictionaries/index.md   |   8 +-
 src/Dictionaries/HashedArrayDictionary.cpp    | 343 +++++++++++-------
 src/Dictionaries/HashedArrayDictionary.h      | 112 ++++--
 src/Dictionaries/HashedDictionary.h           |   9 +-
 .../HashedDictionaryParallelLoader.h          |   7 +-
 ...shed_array_dictionary_simple_key.reference |  66 ++++
 ...hashed_array_dictionary_simple_key.sql.j2} |  19 +-
 ...hed_array_dictionary_complex_key.reference |  56 +++
 ...ashed_array_dictionary_complex_key.sql.j2} |  12 +-
 ...ictionary_hierarchical_functions.reference |  35 ++
 ..._dictionary_hierarchical_functions.sql.j2} |   6 +-
 ...dictionaries_nullable_parent_key.reference |  36 ++
 ...l_dictionaries_nullable_parent_key.sql.j2} |   6 +-
 .../02760_dictionaries_memory.sql.j2          |   1 +
 14 files changed, 530 insertions(+), 186 deletions(-)
 rename tests/queries/0_stateless/{02098_hashed_array_dictionary_simple_key.sql => 02098_hashed_array_dictionary_simple_key.sql.j2} (95%)
 rename tests/queries/0_stateless/{02099_hashed_array_dictionary_complex_key.sql => 02099_hashed_array_dictionary_complex_key.sql.j2} (96%)
 rename tests/queries/0_stateless/{02311_hashed_array_dictionary_hierarchical_functions.sql => 02311_hashed_array_dictionary_hierarchical_functions.sql.j2} (91%)
 rename tests/queries/0_stateless/{02316_hierarchical_dictionaries_nullable_parent_key.sql => 02316_hierarchical_dictionaries_nullable_parent_key.sql.j2} (97%)

diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index 4f021b25809..9f86aaf2502 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -394,7 +394,7 @@ Configuration example:
 or
 
 ``` sql
-LAYOUT(HASHED_ARRAY())
+LAYOUT(HASHED_ARRAY([SHARDS 1]))
 ```
 
 ### complex_key_hashed_array
@@ -412,7 +412,7 @@ Configuration example:
 or
 
 ``` sql
-LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
+LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1]))
 ```
 
 ### range_hashed {#range_hashed}
@@ -2415,8 +2415,8 @@ clickhouse client \
     --secure \
     --password MY_PASSWORD \
     --query "
-    INSERT INTO regexp_dictionary_source_table 
-    SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)') 
+    INSERT INTO regexp_dictionary_source_table
+    SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)')
     FORMAT CSV" < regexp_dict.csv
 ```
 
diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp
index 21016025d96..4c9ff8abe80 100644
--- a/src/Dictionaries/HashedArrayDictionary.cpp
+++ b/src/Dictionaries/HashedArrayDictionary.cpp
@@ -20,17 +20,19 @@ namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
     extern const int DICTIONARY_IS_EMPTY;
+    extern const int LOGICAL_ERROR;
     extern const int UNSUPPORTED_METHOD;
 }
 
-template <DictionaryKeyType dictionary_key_type>
-HashedArrayDictionary<dictionary_key_type>::HashedArrayDictionary(
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+HashedArrayDictionary<dictionary_key_type, sharded>::HashedArrayDictionary(
     const StorageID & dict_id_,
     const DictionaryStructure & dict_struct_,
     DictionarySourcePtr source_ptr_,
     const HashedArrayDictionaryStorageConfiguration & configuration_,
     BlockPtr update_field_loaded_block_)
     : IDictionary(dict_id_)
+    , log(&Poco::Logger::get("HashedArrayDictionary"))
     , dict_struct(dict_struct_)
     , source_ptr(std::move(source_ptr_))
     , configuration(configuration_)
@@ -42,8 +44,8 @@ HashedArrayDictionary<dictionary_key_type>::HashedArrayDictionary(
     calculateBytesAllocated();
 }
 
-template <DictionaryKeyType dictionary_key_type>
-ColumnPtr HashedArrayDictionary<dictionary_key_type>::getColumn(
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+ColumnPtr HashedArrayDictionary<dictionary_key_type, sharded>::getColumn(
     const std::string & attribute_name,
     const DataTypePtr & result_type,
     const Columns & key_columns,
@@ -67,8 +69,8 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getColumn(
     return getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, extractor);
 }
 
-template <DictionaryKeyType dictionary_key_type>
-Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+Columns HashedArrayDictionary<dictionary_key_type, sharded>::getColumns(
     const Strings & attribute_names,
     const DataTypes & result_types,
     const Columns & key_columns,
@@ -83,7 +85,7 @@ Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
 
     const size_t keys_size = extractor.getKeysSize();
 
-    PaddedPODArray<ssize_t> key_index_to_element_index;
+    KeyIndexToElementIndex key_index_to_element_index;
 
     /** Optimization for multiple attributes.
       * For each key save element index in key_index_to_element_index array.
@@ -92,7 +94,6 @@ Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
       */
     if (attribute_names.size() > 1)
     {
-        const auto & key_attribute_container = key_attribute.container;
         size_t keys_found = 0;
 
         key_index_to_element_index.resize(keys_size);
@@ -100,15 +101,23 @@ Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
         for (size_t key_index = 0; key_index < keys_size; ++key_index)
         {
             auto key = extractor.extractCurrentKey();
+            auto shard = getShard(key);
+            const auto & key_attribute_container = key_attribute.containers[shard];
 
             auto it = key_attribute_container.find(key);
             if (it == key_attribute_container.end())
             {
-                key_index_to_element_index[key_index] = -1;
+                if constexpr (sharded)
+                    key_index_to_element_index[key_index] = std::make_pair(-1, shard);
+                else
+                    key_index_to_element_index[key_index] = -1;
             }
             else
             {
-                key_index_to_element_index[key_index] = it->getMapped();
+                if constexpr (sharded)
+                    key_index_to_element_index[key_index] = std::make_pair(it->getMapped(), shard);
+                else
+                    key_index_to_element_index[key_index] = it->getMapped();
                 ++keys_found;
             }
 
@@ -147,8 +156,8 @@ Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
     return result_columns;
 }
 
-template <DictionaryKeyType dictionary_key_type>
-ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type, sharded>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
 {
     if (dictionary_key_type == DictionaryKeyType::Complex)
         dict_struct.validateKeyTypes(key_types);
@@ -166,8 +175,10 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::hasKeys(const Colum
     for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
     {
         auto requested_key = extractor.extractCurrentKey();
+        auto shard = getShard(requested_key);
+        const auto & key_attribute_container = key_attribute.containers[shard];
 
-        out[requested_key_index] = key_attribute.container.find(requested_key) != key_attribute.container.end();
+        out[requested_key_index] = key_attribute_container.find(requested_key) != key_attribute_container.end();
 
         keys_found += out[requested_key_index];
         extractor.rollbackCurrentKey();
@@ -179,8 +190,8 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::hasKeys(const Colum
     return result;
 }
 
-template <DictionaryKeyType dictionary_key_type>
-ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+ColumnPtr HashedArrayDictionary<dictionary_key_type, sharded>::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const
 {
     if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
     {
@@ -197,16 +208,20 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
         if (!dictionary_attribute.null_value.isNull())
             null_value = dictionary_attribute.null_value.get<UInt64>();
 
-        const auto & key_attribute_container = key_attribute.container;
-        const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
 
-        auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
+        auto is_key_valid_func = [&, this](auto & key)
+        {
+            const auto & key_attribute_container = key_attribute.containers[getShard(key)];
+            return key_attribute_container.find(key) != key_attribute_container.end();
+        };
 
         size_t keys_found = 0;
 
-        auto get_parent_func = [&](auto & hierarchy_key)
+        auto get_parent_func = [&, this](auto & hierarchy_key)
         {
             std::optional<UInt64> result;
+            auto shard = getShard(hierarchy_key);
+            const auto & key_attribute_container = key_attribute.containers[shard];
 
             auto it = key_attribute_container.find(hierarchy_key);
 
@@ -215,8 +230,9 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
 
             size_t key_index = it->getMapped();
 
-            if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
+            if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][key_index])
                 return result;
+            const auto & parent_keys_container = std::get<AttributeContainerShardsType<UInt64>>(hierarchical_attribute.containers)[shard];
 
             UInt64 parent_key = parent_keys_container[key_index];
             if (null_value && *null_value == parent_key)
@@ -241,8 +257,8 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
     }
 }
 
-template <DictionaryKeyType dictionary_key_type>
-ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type, sharded>::isInHierarchy(
     ColumnPtr key_column [[maybe_unused]],
     ColumnPtr in_key_column [[maybe_unused]],
     const DataTypePtr &) const
@@ -265,16 +281,20 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
         if (!dictionary_attribute.null_value.isNull())
             null_value = dictionary_attribute.null_value.get<UInt64>();
 
-        const auto & key_attribute_container = key_attribute.container;
-        const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
 
-        auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
+        auto is_key_valid_func = [&](auto & key)
+        {
+            const auto & key_attribute_container = key_attribute.containers[getShard(key)];
+            return key_attribute_container.find(key) != key_attribute_container.end();
+        };
 
         size_t keys_found = 0;
 
         auto get_parent_func = [&](auto & hierarchy_key)
         {
             std::optional<UInt64> result;
+            auto shard = getShard(hierarchy_key);
+            const auto & key_attribute_container = key_attribute.containers[shard];
 
             auto it = key_attribute_container.find(hierarchy_key);
 
@@ -283,9 +303,10 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
 
             size_t key_index = it->getMapped();
 
-            if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
+            if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][key_index])
                 return result;
 
+            const auto & parent_keys_container = std::get<AttributeContainerShardsType<UInt64>>(hierarchical_attribute.containers)[shard];
             UInt64 parent_key = parent_keys_container[key_index];
             if (null_value && *null_value == parent_key)
                 return result;
@@ -309,8 +330,8 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
     }
 }
 
-template <DictionaryKeyType dictionary_key_type>
-DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key_type>::getHierarchicalIndex() const
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key_type, sharded>::getHierarchicalIndex() const
 {
     if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
     {
@@ -318,33 +339,35 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key
             return hierarchical_index;
 
         size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
-        const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
-        const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
-
-        const auto & key_attribute_container = key_attribute.container;
-
-        HashMap<size_t, UInt64> index_to_key;
-        index_to_key.reserve(key_attribute.container.size());
-
-        for (auto & [key, value] : key_attribute_container)
-            index_to_key[value] = key;
 
         DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child;
-        parent_to_child.reserve(index_to_key.size());
-
-        size_t parent_keys_container_size = parent_keys_container.size();
-        for (size_t i = 0; i < parent_keys_container_size; ++i)
+        for (size_t shard = 0; shard < configuration.shards; ++shard)
         {
-            if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[i])
-                continue;
+            HashMap<size_t, UInt64> index_to_key;
+            index_to_key.reserve(element_counts[shard]);
 
-            const auto * it = index_to_key.find(i);
-            if (it == index_to_key.end())
-                continue;
+            for (auto & [key, value] : key_attribute.containers[shard])
+                index_to_key[value] = key;
 
-            auto child_key = it->getMapped();
-            auto parent_key = parent_keys_container[i];
-            parent_to_child[parent_key].emplace_back(child_key);
+            parent_to_child.reserve(parent_to_child.size() + index_to_key.size());
+
+            const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
+            const auto & parent_keys_container = std::get<AttributeContainerShardsType<UInt64>>(hierarchical_attribute.containers)[shard];
+
+            size_t parent_keys_container_size = parent_keys_container.size();
+            for (size_t i = 0; i < parent_keys_container_size; ++i)
+            {
+                if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][i])
+                    continue;
+
+                const auto * it = index_to_key.find(i);
+                if (it == index_to_key.end())
+                    continue;
+
+                auto child_key = it->getMapped();
+                auto parent_key = parent_keys_container[i];
+                parent_to_child[parent_key].emplace_back(child_key);
+            }
         }
 
         return std::make_shared<DictionaryHierarchicalParentToChildIndex>(parent_to_child);
@@ -355,8 +378,8 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key
     }
 }
 
-template <DictionaryKeyType dictionary_key_type>
-ColumnPtr HashedArrayDictionary<dictionary_key_type>::getDescendants(
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+ColumnPtr HashedArrayDictionary<dictionary_key_type, sharded>::getDescendants(
     ColumnPtr key_column [[maybe_unused]],
     const DataTypePtr &,
     size_t level [[maybe_unused]],
@@ -381,8 +404,8 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getDescendants(
     }
 }
 
-template <DictionaryKeyType dictionary_key_type>
-void HashedArrayDictionary<dictionary_key_type>::createAttributes()
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+void HashedArrayDictionary<dictionary_key_type, sharded>::createAttributes()
 {
     const auto size = dict_struct.attributes.size();
     attributes.reserve(size);
@@ -395,17 +418,24 @@ void HashedArrayDictionary<dictionary_key_type>::createAttributes()
             using AttributeType = typename Type::AttributeType;
             using ValueType = DictionaryValueType<AttributeType>;
 
-            auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional<std::vector<bool>>() : std::optional<std::vector<bool>>{};
-            Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerType<ValueType>(), std::move(is_index_null)};
+            auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional<std::vector<typename Attribute::RowsMask>>(configuration.shards) : std::nullopt;
+            Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerShardsType<ValueType>(configuration.shards), std::move(is_index_null)};
             attributes.emplace_back(std::move(attribute));
         };
 
         callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
     }
+
+    key_attribute.containers.resize(configuration.shards);
+    element_counts.resize(configuration.shards);
+
+    string_arenas.resize(configuration.shards);
+    for (auto & arena : string_arenas)
+        arena = std::make_unique<Arena>();
 }
 
-template <DictionaryKeyType dictionary_key_type>
-void HashedArrayDictionary<dictionary_key_type>::updateData()
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+void HashedArrayDictionary<dictionary_key_type, sharded>::updateData()
 {
     if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
     {
@@ -445,13 +475,17 @@ void HashedArrayDictionary<dictionary_key_type>::updateData()
     if (update_field_loaded_block)
     {
         resize(update_field_loaded_block->rows());
-        blockToAttributes(*update_field_loaded_block.get());
+        DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
+        blockToAttributes(*update_field_loaded_block.get(), arena_holder, /* shard = */ 0);
     }
 }
 
-template <DictionaryKeyType dictionary_key_type>
-void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block & block [[maybe_unused]])
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+void HashedArrayDictionary<dictionary_key_type, sharded>::blockToAttributes(const Block & block, DictionaryKeysArenaHolder<dictionary_key_type> & arena_holder, size_t shard)
 {
+    if (unlikely(shard >= configuration.shards))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Shard number {} is out of range: 0..{}", shard, configuration.shards - 1);
+
     size_t skip_keys_size_offset = dict_struct.getKeysSize();
 
     Columns key_columns;
@@ -461,7 +495,6 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
     for (size_t i = 0; i < skip_keys_size_offset; ++i)
         key_columns.emplace_back(block.safeGetByPosition(i).column);
 
-    DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
     DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
     const size_t keys_size = keys_extractor.getKeysSize();
 
@@ -471,18 +504,18 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
     {
         auto key = keys_extractor.extractCurrentKey();
 
-        auto it = key_attribute.container.find(key);
+        auto it = key_attribute.containers[shard].find(key);
 
-        if (it != key_attribute.container.end())
+        if (it != key_attribute.containers[shard].end())
         {
             keys_extractor.rollbackCurrentKey();
             continue;
         }
 
         if constexpr (std::is_same_v<KeyType, StringRef>)
-            key = copyStringInArena(string_arena, key);
+            key = copyStringInArena(*string_arenas[shard], key);
 
-        key_attribute.container.insert({key, element_count});
+        key_attribute.containers[shard].insert({key, element_counts[shard]});
 
         for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
         {
@@ -498,16 +531,16 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
                 using AttributeType = typename Type::AttributeType;
                 using AttributeValueType = DictionaryValueType<AttributeType>;
 
-                auto & attribute_container = std::get<AttributeContainerType<AttributeValueType>>(attribute.container);
+                auto & attribute_container = std::get<AttributeContainerShardsType<AttributeValueType>>(attribute.containers)[shard];
                 attribute_container.emplace_back();
 
                 if (attribute_is_nullable)
                 {
-                    attribute.is_index_null->emplace_back();
+                    (*attribute.is_index_null)[shard].emplace_back();
 
                     if (column_value_to_insert.isNull())
                     {
-                        (*attribute.is_index_null).back() = true;
+                        (*attribute.is_index_null)[shard].back() = true;
                         return;
                     }
                 }
@@ -515,7 +548,7 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
                 if constexpr (std::is_same_v<AttributeValueType, StringRef>)
                 {
                     String & value_to_insert = column_value_to_insert.get<String>();
-                    StringRef string_in_arena_reference = copyStringInArena(string_arena, value_to_insert);
+                    StringRef string_in_arena_reference = copyStringInArena(*string_arenas[shard], value_to_insert);
                     attribute_container.back() = string_in_arena_reference;
                 }
                 else
@@ -528,23 +561,29 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
             callOnDictionaryAttributeType(attribute.type, type_call);
         }
 
-        ++element_count;
+        ++element_counts[shard];
+        ++total_element_count;
         keys_extractor.rollbackCurrentKey();
     }
 }
 
-template <DictionaryKeyType dictionary_key_type>
-void HashedArrayDictionary<dictionary_key_type>::resize(size_t total_rows)
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+void HashedArrayDictionary<dictionary_key_type, sharded>::resize(size_t total_rows)
 {
     if (unlikely(!total_rows))
         return;
 
-    key_attribute.container.reserve(total_rows);
+    /// In multi shards configuration it is pointless.
+    if constexpr (sharded)
+        return;
+
+    for (auto & container : key_attribute.containers)
+        container.reserve(total_rows);
 }
 
-template <DictionaryKeyType dictionary_key_type>
+template <DictionaryKeyType dictionary_key_type, bool sharded>
 template <typename KeysProvider>
-ColumnPtr HashedArrayDictionary<dictionary_key_type>::getAttributeColumn(
+ColumnPtr HashedArrayDictionary<dictionary_key_type, sharded>::getAttributeColumn(
     const Attribute & attribute,
     const DictionaryAttribute & dictionary_attribute,
     size_t keys_size,
@@ -638,16 +677,14 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getAttributeColumn(
     return result;
 }
 
-template <DictionaryKeyType dictionary_key_type>
+template <DictionaryKeyType dictionary_key_type, bool sharded>
 template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
-void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
+void HashedArrayDictionary<dictionary_key_type, sharded>::getItemsImpl(
     const Attribute & attribute,
     DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
     ValueSetter && set_value [[maybe_unused]],
     DefaultValueExtractor & default_value_extractor) const
 {
-    const auto & key_attribute_container = key_attribute.container;
-    const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
     const size_t keys_size = keys_extractor.getKeysSize();
 
     size_t keys_found = 0;
@@ -655,6 +692,9 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
     for (size_t key_index = 0; key_index < keys_size; ++key_index)
     {
         auto key = keys_extractor.extractCurrentKey();
+        auto shard = getShard(key);
+        const auto & key_attribute_container = key_attribute.containers[shard];
+        const auto & attribute_container = std::get<AttributeContainerShardsType<AttributeType>>(attribute.containers)[shard];
 
         const auto it = key_attribute_container.find(key);
 
@@ -665,7 +705,7 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
             const auto & element = attribute_container[element_index];
 
             if constexpr (is_nullable)
-                set_value(key_index, element, (*attribute.is_index_null)[element_index]);
+                set_value(key_index, element, (*attribute.is_index_null)[shard][element_index]);
             else
                 set_value(key_index, element, false);
 
@@ -686,28 +726,39 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
     found_count.fetch_add(keys_found, std::memory_order_relaxed);
 }
 
-template <DictionaryKeyType dictionary_key_type>
+template <DictionaryKeyType dictionary_key_type, bool sharded>
 template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
-void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
+void HashedArrayDictionary<dictionary_key_type, sharded>::getItemsImpl(
     const Attribute & attribute,
-    const PaddedPODArray<ssize_t> & key_index_to_element_index,
+    const KeyIndexToElementIndex & key_index_to_element_index,
     ValueSetter && set_value,
     DefaultValueExtractor & default_value_extractor) const
 {
-    const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
     const size_t keys_size = key_index_to_element_index.size();
+    size_t shard = 0;
 
     for (size_t key_index = 0; key_index < keys_size; ++key_index)
     {
-        bool key_exists = key_index_to_element_index[key_index] != -1;
-
-        if (key_exists)
+        ssize_t element_index;
+        if constexpr (sharded)
         {
-            size_t element_index = static_cast<size_t>(key_index_to_element_index[key_index]);
-            const auto & element = attribute_container[element_index];
+            element_index = key_index_to_element_index[key_index].first;
+            shard = key_index_to_element_index[key_index].second;
+        }
+        else
+        {
+            element_index = key_index_to_element_index[key_index];
+        }
+
+        if (element_index != -1)
+        {
+            const auto & attribute_container = std::get<AttributeContainerShardsType<AttributeType>>(attribute.containers)[shard];
+
+            size_t found_element_index = static_cast<size_t>(element_index);
+            const auto & element = attribute_container[found_element_index];
 
             if constexpr (is_nullable)
-                set_value(key_index, element, (*attribute.is_index_null)[element_index]);
+                set_value(key_index, element, (*attribute.is_index_null)[shard][found_element_index]);
             else
                 set_value(key_index, element, false);
         }
@@ -721,13 +772,17 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
     }
 }
 
-template <DictionaryKeyType dictionary_key_type>
-void HashedArrayDictionary<dictionary_key_type>::loadData()
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+void HashedArrayDictionary<dictionary_key_type, sharded>::loadData()
 {
     if (!source_ptr->hasUpdateField())
     {
-        QueryPipeline pipeline;
-        pipeline = QueryPipeline(source_ptr->loadAll());
+
+        std::optional<DictionaryParallelLoaderType> parallel_loader;
+        if constexpr (sharded)
+            parallel_loader.emplace(*this);
+
+        QueryPipeline pipeline(source_ptr->loadAll());
         DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor);
 
         UInt64 pull_time_microseconds = 0;
@@ -751,10 +806,22 @@ void HashedArrayDictionary<dictionary_key_type>::loadData()
 
             Stopwatch watch_process;
             resize(total_rows);
-            blockToAttributes(block);
+
+            if (parallel_loader)
+            {
+                parallel_loader->addBlock(block);
+            }
+            else
+            {
+                DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
+                blockToAttributes(block, arena_holder, /* shard = */ 0);
+            }
             process_time_microseconds += watch_process.elapsedMicroseconds();
         }
 
+        if (parallel_loader)
+            parallel_loader->finish();
+
         LOG_DEBUG(&Poco::Logger::get("HashedArrayDictionary"),
             "Finished {}reading {} blocks with {} rows from pipeline in {:.2f} sec and inserted into hashtable in {:.2f} sec",
             configuration.use_async_executor ? "asynchronous " : "",
@@ -765,14 +832,14 @@ void HashedArrayDictionary<dictionary_key_type>::loadData()
         updateData();
     }
 
-    if (configuration.require_nonempty && 0 == element_count)
+    if (configuration.require_nonempty && 0 == total_element_count)
         throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,
             "{}: dictionary source is empty and 'require_nonempty' property is set.",
             getFullName());
 }
 
-template <DictionaryKeyType dictionary_key_type>
-void HashedArrayDictionary<dictionary_key_type>::buildHierarchyParentToChildIndexIfNeeded()
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+void HashedArrayDictionary<dictionary_key_type, sharded>::buildHierarchyParentToChildIndexIfNeeded()
 {
     if (!dict_struct.hierarchical_attribute_index)
         return;
@@ -781,12 +848,13 @@ void HashedArrayDictionary<dictionary_key_type>::buildHierarchyParentToChildInde
         hierarchical_index = getHierarchicalIndex();
 }
 
-template <DictionaryKeyType dictionary_key_type>
-void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+void HashedArrayDictionary<dictionary_key_type, sharded>::calculateBytesAllocated()
 {
     bytes_allocated += attributes.size() * sizeof(attributes.front());
 
-    bytes_allocated += key_attribute.container.size();
+    for (const auto & container : key_attribute.containers)
+        bytes_allocated += container.size();
 
     for (auto & attribute : attributes)
     {
@@ -796,26 +864,29 @@ void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
             using AttributeType = typename Type::AttributeType;
             using ValueType = DictionaryValueType<AttributeType>;
 
-            const auto & container = std::get<AttributeContainerType<ValueType>>(attribute.container);
-            bytes_allocated += sizeof(AttributeContainerType<ValueType>);
-
-            if constexpr (std::is_same_v<ValueType, Array>)
+            for (const auto & container : std::get<AttributeContainerShardsType<ValueType>>(attribute.containers))
             {
-                /// It is not accurate calculations
-                bytes_allocated += sizeof(Array) * container.size();
-            }
-            else
-            {
-                bytes_allocated += container.allocated_bytes();
-            }
+                bytes_allocated += sizeof(AttributeContainerType<ValueType>);
 
-            bucket_count = container.capacity();
+                if constexpr (std::is_same_v<ValueType, Array>)
+                {
+                    /// It is not accurate calculations
+                    bytes_allocated += sizeof(Array) * container.size();
+                }
+                else
+                {
+                    bytes_allocated += container.allocated_bytes();
+                }
+
+                bucket_count = container.capacity();
+            }
         };
 
         callOnDictionaryAttributeType(attribute.type, type_call);
 
         if (attribute.is_index_null.has_value())
-            bytes_allocated += (*attribute.is_index_null).size();
+            for (const auto & container : attribute.is_index_null.value())
+                bytes_allocated += container.size();
     }
 
     if (update_field_loaded_block)
@@ -826,18 +897,19 @@ void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
         hierarchical_index_bytes_allocated = hierarchical_index->getSizeInBytes();
         bytes_allocated += hierarchical_index_bytes_allocated;
     }
-
-    bytes_allocated += string_arena.allocatedBytes();
+    for (const auto & string_arena : string_arenas)
+        bytes_allocated += string_arena->allocatedBytes();
 }
 
-template <DictionaryKeyType dictionary_key_type>
-Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
+template <DictionaryKeyType dictionary_key_type, bool sharded>
+Pipe HashedArrayDictionary<dictionary_key_type, sharded>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
 {
     PaddedPODArray<HashedArrayDictionary::KeyType> keys;
-    keys.reserve(key_attribute.container.size());
+    keys.reserve(total_element_count);
 
-    for (auto & [key, _] : key_attribute.container)
-        keys.emplace_back(key);
+    for (const auto & container : key_attribute.containers)
+        for (auto & [key, _] : container)
+            keys.emplace_back(key);
 
     ColumnsWithTypeAndName key_columns;
 
@@ -858,8 +930,10 @@ Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names
     return result;
 }
 
-template class HashedArrayDictionary<DictionaryKeyType::Simple>;
-template class HashedArrayDictionary<DictionaryKeyType::Complex>;
+template class HashedArrayDictionary<DictionaryKeyType::Simple, /* sharded */ false>;
+template class HashedArrayDictionary<DictionaryKeyType::Simple, /* sharded */ true>;
+template class HashedArrayDictionary<DictionaryKeyType::Complex, /* sharded */ false>;
+template class HashedArrayDictionary<DictionaryKeyType::Complex, /* sharded */ true>;
 
 void registerDictionaryArrayHashed(DictionaryFactory & factory)
 {
@@ -886,7 +960,14 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory)
         const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
         const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
 
-        HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime};
+        std::string dictionary_layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "hashed_array" : "complex_key_hashed_array";
+        std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name;
+
+        Int64 shards = config.getInt(config_prefix + dictionary_layout_prefix + ".shards", 1);
+        if (shards <= 0 || 128 < shards)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name);
+
+        HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast<size_t>(shards)};
 
         ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
         const auto & settings = context->getSettingsRef();
@@ -895,9 +976,17 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory)
         configuration.use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor;
 
         if (dictionary_key_type == DictionaryKeyType::Simple)
-            return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Simple>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+        {
+            if (shards > 1)
+                return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Simple, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+            return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Simple, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+        }
         else
-            return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+        {
+            if (shards > 1)
+                return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+            return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
+        }
     };
 
     factory.registerLayout("hashed_array",
diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h
index 3b9446e4e8f..606008ce921 100644
--- a/src/Dictionaries/HashedArrayDictionary.h
+++ b/src/Dictionaries/HashedArrayDictionary.h
@@ -13,6 +13,7 @@
 #include <Dictionaries/IDictionary.h>
 #include <Dictionaries/IDictionarySource.h>
 #include <Dictionaries/DictionaryHelpers.h>
+#include <Dictionaries/HashedDictionaryParallelLoader.h>
 
 /** This dictionary stores all attributes in arrays.
   * Key is stored in hash table and value is index into attribute array.
@@ -25,12 +26,17 @@ struct HashedArrayDictionaryStorageConfiguration
 {
     const bool require_nonempty;
     const DictionaryLifetime lifetime;
+    size_t shards = 1;
+    size_t shard_load_queue_backlog = 10000;
     bool use_async_executor = false;
 };
 
-template <DictionaryKeyType dictionary_key_type>
+template <DictionaryKeyType dictionary_key_type, bool sharded>
 class HashedArrayDictionary final : public IDictionary
 {
+    using DictionaryParallelLoaderType = HashedDictionaryImpl::HashedDictionaryParallelLoader<dictionary_key_type, HashedArrayDictionary<dictionary_key_type, sharded>>;
+    friend class HashedDictionaryImpl::HashedDictionaryParallelLoader<dictionary_key_type, HashedArrayDictionary<dictionary_key_type, sharded>>;
+
 public:
     using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
 
@@ -63,13 +69,13 @@ public:
 
     double getHitRate() const override { return 1.0; }
 
-    size_t getElementCount() const override { return element_count; }
+    size_t getElementCount() const override { return total_element_count; }
 
-    double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
+    double getLoadFactor() const override { return static_cast<double>(total_element_count) / bucket_count; }
 
     std::shared_ptr<const IExternalLoadable> clone() const override
     {
-        return std::make_shared<HashedArrayDictionary<dictionary_key_type>>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block);
+        return std::make_shared<HashedArrayDictionary<dictionary_key_type, sharded>>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block);
     }
 
     DictionarySourcePtr getSource() const override { return source_ptr; }
@@ -132,50 +138,54 @@ private:
     template <typename Value>
     using AttributeContainerType = std::conditional_t<std::is_same_v<Value, Array>, std::vector<Value>, PaddedPODArray<Value>>;
 
+    template <typename Value>
+    using AttributeContainerShardsType = std::vector<AttributeContainerType<Value>>;
+
     struct Attribute final
     {
         AttributeUnderlyingType type;
 
         std::variant<
-            AttributeContainerType<UInt8>,
-            AttributeContainerType<UInt16>,
-            AttributeContainerType<UInt32>,
-            AttributeContainerType<UInt64>,
-            AttributeContainerType<UInt128>,
-            AttributeContainerType<UInt256>,
-            AttributeContainerType<Int8>,
-            AttributeContainerType<Int16>,
-            AttributeContainerType<Int32>,
-            AttributeContainerType<Int64>,
-            AttributeContainerType<Int128>,
-            AttributeContainerType<Int256>,
-            AttributeContainerType<Decimal32>,
-            AttributeContainerType<Decimal64>,
-            AttributeContainerType<Decimal128>,
-            AttributeContainerType<Decimal256>,
-            AttributeContainerType<DateTime64>,
-            AttributeContainerType<Float32>,
-            AttributeContainerType<Float64>,
-            AttributeContainerType<UUID>,
-            AttributeContainerType<IPv4>,
-            AttributeContainerType<IPv6>,
-            AttributeContainerType<StringRef>,
-            AttributeContainerType<Array>>
-            container;
+            AttributeContainerShardsType<UInt8>,
+            AttributeContainerShardsType<UInt16>,
+            AttributeContainerShardsType<UInt32>,
+            AttributeContainerShardsType<UInt64>,
+            AttributeContainerShardsType<UInt128>,
+            AttributeContainerShardsType<UInt256>,
+            AttributeContainerShardsType<Int8>,
+            AttributeContainerShardsType<Int16>,
+            AttributeContainerShardsType<Int32>,
+            AttributeContainerShardsType<Int64>,
+            AttributeContainerShardsType<Int128>,
+            AttributeContainerShardsType<Int256>,
+            AttributeContainerShardsType<Decimal32>,
+            AttributeContainerShardsType<Decimal64>,
+            AttributeContainerShardsType<Decimal128>,
+            AttributeContainerShardsType<Decimal256>,
+            AttributeContainerShardsType<DateTime64>,
+            AttributeContainerShardsType<Float32>,
+            AttributeContainerShardsType<Float64>,
+            AttributeContainerShardsType<UUID>,
+            AttributeContainerShardsType<IPv4>,
+            AttributeContainerShardsType<IPv6>,
+            AttributeContainerShardsType<StringRef>,
+            AttributeContainerShardsType<Array>>
+            containers;
 
-        std::optional<std::vector<bool>> is_index_null;
+        /// One container per shard
+        using RowsMask = std::vector<bool>;
+        std::optional<std::vector<RowsMask>> is_index_null;
     };
 
     struct KeyAttribute final
     {
-
-        KeyContainerType container;
-
+        /// One container per shard
+        std::vector<KeyContainerType> containers;
     };
 
     void createAttributes();
 
-    void blockToAttributes(const Block & block);
+    void blockToAttributes(const Block & block, DictionaryKeysArenaHolder<dictionary_key_type> & arena_holder, size_t shard);
 
     void updateData();
 
@@ -185,6 +195,22 @@ private:
 
     void calculateBytesAllocated();
 
+    UInt64 getShard(UInt64 key) const
+    {
+        if constexpr (!sharded)
+            return 0;
+        /// NOTE: function here should not match with the DefaultHash<> since
+        /// it used for the HashMap/sparse_hash_map.
+        return intHashCRC32(key) % configuration.shards;
+    }
+
+    UInt64 getShard(StringRef key) const
+    {
+        if constexpr (!sharded)
+            return 0;
+        return StringRefHash()(key) % configuration.shards;
+    }
+
     template <typename KeysProvider>
     ColumnPtr getAttributeColumn(
         const Attribute & attribute,
@@ -200,10 +226,13 @@ private:
         ValueSetter && set_value,
         DefaultValueExtractor & default_value_extractor) const;
 
+
+    using KeyIndexToElementIndex = std::conditional_t<sharded, PaddedPODArray<std::pair<ssize_t, UInt8>>, PaddedPODArray<ssize_t>>;
+
     template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
     void getItemsImpl(
         const Attribute & attribute,
-        const PaddedPODArray<ssize_t> & key_index_to_element_index,
+        const KeyIndexToElementIndex & key_index_to_element_index,
         ValueSetter && set_value,
         DefaultValueExtractor & default_value_extractor) const;
 
@@ -215,6 +244,8 @@ private:
 
     void resize(size_t total_rows);
 
+    Poco::Logger * log;
+
     const DictionaryStructure dict_struct;
     const DictionarySourcePtr source_ptr;
     const HashedArrayDictionaryStorageConfiguration configuration;
@@ -225,17 +256,20 @@ private:
 
     size_t bytes_allocated = 0;
     size_t hierarchical_index_bytes_allocated = 0;
-    size_t element_count = 0;
+    std::atomic<size_t> total_element_count = 0;
+    std::vector<size_t> element_counts;
     size_t bucket_count = 0;
     mutable std::atomic<size_t> query_count{0};
     mutable std::atomic<size_t> found_count{0};
 
     BlockPtr update_field_loaded_block;
-    Arena string_arena;
+    std::vector<std::unique_ptr<Arena>> string_arenas;
     DictionaryHierarchicalParentToChildIndexPtr hierarchical_index;
 };
 
-extern template class HashedArrayDictionary<DictionaryKeyType::Simple>;
-extern template class HashedArrayDictionary<DictionaryKeyType::Complex>;
+extern template class HashedArrayDictionary<DictionaryKeyType::Simple, false>;
+extern template class HashedArrayDictionary<DictionaryKeyType::Simple, true>;
+extern template class HashedArrayDictionary<DictionaryKeyType::Complex, false>;
+extern template class HashedArrayDictionary<DictionaryKeyType::Complex, true>;
 
 }
diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h
index 376637189dd..8009ffab80a 100644
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@@ -71,7 +71,8 @@ struct HashedDictionaryConfiguration
 template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
 class HashedDictionary final : public IDictionary
 {
-    friend class HashedDictionaryParallelLoader<dictionary_key_type, sparse, sharded>;
+    using DictionaryParallelLoaderType = HashedDictionaryParallelLoader<dictionary_key_type, HashedDictionary<dictionary_key_type, sparse, sharded>>;
+    friend class HashedDictionaryParallelLoader<dictionary_key_type, HashedDictionary<dictionary_key_type, sparse, sharded>>;
 
 public:
     using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
@@ -987,7 +988,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
         auto key = keys_extractor.extractCurrentKey();
         auto shard = getShard(key);
 
-        const auto & container = attribute_containers[getShard(key)];
+        const auto & container = attribute_containers[shard];
         const auto it = container.find(key);
 
         if (it != container.end())
@@ -1020,11 +1021,11 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::loadData()
 {
     if (!source_ptr->hasUpdateField())
     {
-        std::optional<HashedDictionaryParallelLoader<dictionary_key_type, sparse, sharded>> parallel_loader;
+        std::optional<DictionaryParallelLoaderType> parallel_loader;
         if constexpr (sharded)
             parallel_loader.emplace(*this);
 
-        QueryPipeline pipeline = QueryPipeline(source_ptr->loadAll());
+        QueryPipeline pipeline(source_ptr->loadAll());
 
         DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor);
         Block block;
diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h
index b52158c7fcb..907a987555e 100644
--- a/src/Dictionaries/HashedDictionaryParallelLoader.h
+++ b/src/Dictionaries/HashedDictionaryParallelLoader.h
@@ -38,13 +38,12 @@ namespace DB::HashedDictionaryImpl
 {
 
 /// Implementation parallel dictionary load for SHARDS
-template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
+template <DictionaryKeyType dictionary_key_type, typename DictionaryType>
 class HashedDictionaryParallelLoader : public boost::noncopyable
 {
-    using HashedDictionary = HashedDictionary<dictionary_key_type, sparse, sharded>;
 
 public:
-    explicit HashedDictionaryParallelLoader(HashedDictionary & dictionary_)
+    explicit HashedDictionaryParallelLoader(DictionaryType & dictionary_)
         : dictionary(dictionary_)
         , shards(dictionary.configuration.shards)
         , pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards)
@@ -118,7 +117,7 @@ public:
     }
 
 private:
-    HashedDictionary & dictionary;
+    DictionaryType & dictionary;
     const size_t shards;
     ThreadPool pool;
     std::vector<std::optional<ConcurrentBoundedQueue<Block>>> shards_queues;
diff --git a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference
index 6e88bbad146..41b9ab687f8 100644
--- a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference
+++ b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference
@@ -26,6 +26,62 @@ select all values as input stream
 0	value_0	value_second_0
 1	value_1	value_second_1
 2	value_2	value_second_2
+Dictionary hashed_array_dictionary_simple_key_simple_attributes
+dictGet existing value
+value_0	value_second_0
+value_1	value_second_1
+value_2	value_second_2
+dictGet with non existing value
+value_0	value_second_0
+value_1	value_second_1
+value_2	value_second_2
+value_first_default	value_second_default
+dictGetOrDefault existing value
+value_0	value_second_0
+value_1	value_second_1
+value_2	value_second_2
+dictGetOrDefault non existing value
+value_0	value_second_0
+value_1	value_second_1
+value_2	value_second_2
+default	default
+dictHas
+1
+1
+1
+0
+select all values as input stream
+0	value_0	value_second_0
+1	value_1	value_second_1
+2	value_2	value_second_2
+Dictionary hashed_array_dictionary_simple_key_complex_attributes
+dictGet existing value
+value_0	value_second_0
+value_1	\N
+value_2	value_second_2
+dictGet with non existing value
+value_0	value_second_0
+value_1	\N
+value_2	value_second_2
+value_first_default	value_second_default
+dictGetOrDefault existing value
+value_0	value_second_0
+value_1	\N
+value_2	value_second_2
+dictGetOrDefault non existing value
+value_0	value_second_0
+value_1	\N
+value_2	value_second_2
+default	default
+dictHas
+1
+1
+1
+0
+select all values as input stream
+0	value_0	value_second_0
+1	value_1	\N
+2	value_2	value_second_2
 Dictionary hashed_array_dictionary_simple_key_complex_attributes
 dictGet existing value
 value_0	value_second_0
@@ -64,3 +120,13 @@ dictGet
 dictGetHierarchy
 [1]
 [4,2,1]
+Dictionary hashed_array_dictionary_simple_key_hierarchy
+dictGet
+0
+0
+1
+1
+2
+dictGetHierarchy
+[1]
+[4,2,1]
diff --git a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql.j2
similarity index 95%
rename from tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql
rename to tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql.j2
index 7d952223705..e5d8ad36c6d 100644
--- a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql
+++ b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql.j2
@@ -11,6 +11,8 @@ INSERT INTO simple_key_simple_attributes_source_table VALUES(0, 'value_0', 'valu
 INSERT INTO simple_key_simple_attributes_source_table VALUES(1, 'value_1', 'value_second_1');
 INSERT INTO simple_key_simple_attributes_source_table VALUES(2, 'value_2', 'value_second_2');
 
+{% for dictionary_config in ['', 'SHARDS 16'] -%}
+
 DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_simple_attributes;
 CREATE DICTIONARY hashed_array_dictionary_simple_key_simple_attributes
 (
@@ -20,7 +22,7 @@ CREATE DICTIONARY hashed_array_dictionary_simple_key_simple_attributes
 )
 PRIMARY KEY id
 SOURCE(CLICKHOUSE(TABLE 'simple_key_simple_attributes_source_table'))
-LAYOUT(HASHED_ARRAY())
+LAYOUT(HASHED_ARRAY({{ dictionary_config }}))
 LIFETIME(MIN 1 MAX 1000)
 SETTINGS(dictionary_use_async_executor=1, max_threads=8);
 
@@ -43,6 +45,7 @@ SELECT 'select all values as input stream';
 SELECT * FROM hashed_array_dictionary_simple_key_simple_attributes ORDER BY id;
 
 DROP DICTIONARY hashed_array_dictionary_simple_key_simple_attributes;
+{% endfor %}
 
 DROP TABLE simple_key_simple_attributes_source_table;
 
@@ -59,6 +62,8 @@ INSERT INTO simple_key_complex_attributes_source_table VALUES(0, 'value_0', 'val
 INSERT INTO simple_key_complex_attributes_source_table VALUES(1, 'value_1', NULL);
 INSERT INTO simple_key_complex_attributes_source_table VALUES(2, 'value_2', 'value_second_2');
 
+{% for dictionary_config in ['', 'SHARDS 16'] -%}
+
 DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_complex_attributes;
 CREATE DICTIONARY hashed_array_dictionary_simple_key_complex_attributes
 (
@@ -68,7 +73,7 @@ CREATE DICTIONARY hashed_array_dictionary_simple_key_complex_attributes
 )
 PRIMARY KEY id
 SOURCE(CLICKHOUSE(TABLE 'simple_key_complex_attributes_source_table'))
-LAYOUT(HASHED_ARRAY())
+LAYOUT(HASHED_ARRAY({{ dictionary_config }}))
 LIFETIME(MIN 1 MAX 1000);
 
 SELECT 'Dictionary hashed_array_dictionary_simple_key_complex_attributes';
@@ -90,6 +95,9 @@ SELECT 'select all values as input stream';
 SELECT * FROM hashed_array_dictionary_simple_key_complex_attributes ORDER BY id;
 
 DROP DICTIONARY hashed_array_dictionary_simple_key_complex_attributes;
+
+{% endfor %}
+
 DROP TABLE simple_key_complex_attributes_source_table;
 
 DROP TABLE IF EXISTS simple_key_hierarchy_table;
@@ -104,6 +112,8 @@ INSERT INTO simple_key_hierarchy_table VALUES (2, 1);
 INSERT INTO simple_key_hierarchy_table VALUES (3, 1);
 INSERT INTO simple_key_hierarchy_table VALUES (4, 2);
 
+{% for dictionary_config in ['', 'SHARDS 16'] -%}
+
 DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_hierarchy;
 CREATE DICTIONARY hashed_array_dictionary_simple_key_hierarchy
 (
@@ -112,7 +122,7 @@ CREATE DICTIONARY hashed_array_dictionary_simple_key_hierarchy
 )
 PRIMARY KEY id
 SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_hierarchy_table'))
-LAYOUT(HASHED_ARRAY())
+LAYOUT(HASHED_ARRAY({{ dictionary_config }}))
 LIFETIME(MIN 1 MAX 1000);
 
 SELECT 'Dictionary hashed_array_dictionary_simple_key_hierarchy';
@@ -122,5 +132,8 @@ SELECT 'dictGetHierarchy';
 SELECT dictGetHierarchy('hashed_array_dictionary_simple_key_hierarchy', toUInt64(1));
 SELECT dictGetHierarchy('hashed_array_dictionary_simple_key_hierarchy', toUInt64(4));
 
+{% endfor %}
+
 DROP DICTIONARY hashed_array_dictionary_simple_key_hierarchy;
+
 DROP TABLE simple_key_hierarchy_table;
diff --git a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference
index ec32fa72b4e..13a7548b86f 100644
--- a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference
+++ b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference
@@ -26,6 +26,62 @@ select all values as input stream
 0	id_key_0	value_0	value_second_0
 1	id_key_1	value_1	value_second_1
 2	id_key_2	value_2	value_second_2
+Dictionary hashed_array_dictionary_complex_key_simple_attributes
+dictGet existing value
+value_0	value_second_0
+value_1	value_second_1
+value_2	value_second_2
+dictGet with non existing value
+value_0	value_second_0
+value_1	value_second_1
+value_2	value_second_2
+value_first_default	value_second_default
+dictGetOrDefault existing value
+value_0	value_second_0
+value_1	value_second_1
+value_2	value_second_2
+dictGetOrDefault non existing value
+value_0	value_second_0
+value_1	value_second_1
+value_2	value_second_2
+default	default
+dictHas
+1
+1
+1
+0
+select all values as input stream
+0	id_key_0	value_0	value_second_0
+1	id_key_1	value_1	value_second_1
+2	id_key_2	value_2	value_second_2
+Dictionary hashed_array_dictionary_complex_key_complex_attributes
+dictGet existing value
+value_0	value_second_0
+value_1	\N
+value_2	value_second_2
+dictGet with non existing value
+value_0	value_second_0
+value_1	\N
+value_2	value_second_2
+value_first_default	value_second_default
+dictGetOrDefault existing value
+value_0	value_second_0
+value_1	\N
+value_2	value_second_2
+dictGetOrDefault non existing value
+value_0	value_second_0
+value_1	\N
+value_2	value_second_2
+default	default
+dictHas
+1
+1
+1
+0
+select all values as input stream
+0	id_key_0	value_0	value_second_0
+1	id_key_1	value_1	\N
+2	id_key_2	value_2	value_second_2
 Dictionary hashed_array_dictionary_complex_key_complex_attributes
 dictGet existing value
 value_0	value_second_0
diff --git a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql.j2
similarity index 96%
rename from tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql
rename to tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql.j2
index 4d2a825c8af..56f9b264a62 100644
--- a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql
+++ b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql.j2
@@ -12,6 +12,8 @@ INSERT INTO complex_key_simple_attributes_source_table VALUES(0, 'id_key_0', 'va
 INSERT INTO complex_key_simple_attributes_source_table VALUES(1, 'id_key_1', 'value_1', 'value_second_1');
 INSERT INTO complex_key_simple_attributes_source_table VALUES(2, 'id_key_2', 'value_2', 'value_second_2');
 
+{% for dictionary_config in ['', 'SHARDS 16'] -%}
+
 DROP DICTIONARY IF EXISTS hashed_array_dictionary_complex_key_simple_attributes;
 CREATE DICTIONARY hashed_array_dictionary_complex_key_simple_attributes
 (
@@ -23,7 +25,7 @@ CREATE DICTIONARY hashed_array_dictionary_complex_key_simple_attributes
 PRIMARY KEY id, id_key
 SOURCE(CLICKHOUSE(TABLE 'complex_key_simple_attributes_source_table'))
 LIFETIME(MIN 1 MAX 1000)
-LAYOUT(COMPLEX_KEY_HASHED_ARRAY());
+LAYOUT(COMPLEX_KEY_HASHED_ARRAY({{ dictionary_config }}));
 
 SELECT 'Dictionary hashed_array_dictionary_complex_key_simple_attributes';
 SELECT 'dictGet existing value';
@@ -45,6 +47,8 @@ SELECT * FROM hashed_array_dictionary_complex_key_simple_attributes ORDER BY (id
 
 DROP DICTIONARY hashed_array_dictionary_complex_key_simple_attributes;
 
+{% endfor %}
+
 DROP TABLE complex_key_simple_attributes_source_table;
 
 DROP TABLE IF EXISTS complex_key_complex_attributes_source_table;
@@ -61,6 +65,8 @@ INSERT INTO complex_key_complex_attributes_source_table VALUES(0, 'id_key_0', 'v
 INSERT INTO complex_key_complex_attributes_source_table VALUES(1, 'id_key_1', 'value_1', NULL);
 INSERT INTO complex_key_complex_attributes_source_table VALUES(2, 'id_key_2', 'value_2', 'value_second_2');
 
+{% for dictionary_config in ['', 'SHARDS 16'] -%}
+
 DROP DICTIONARY IF EXISTS hashed_array_dictionary_complex_key_complex_attributes;
 CREATE DICTIONARY hashed_array_dictionary_complex_key_complex_attributes
 (
@@ -73,7 +79,7 @@ CREATE DICTIONARY hashed_array_dictionary_complex_key_complex_attributes
 PRIMARY KEY id, id_key
 SOURCE(CLICKHOUSE(TABLE 'complex_key_complex_attributes_source_table'))
 LIFETIME(MIN 1 MAX 1000)
-LAYOUT(COMPLEX_KEY_HASHED_ARRAY());
+LAYOUT(COMPLEX_KEY_HASHED_ARRAY({{ dictionary_config }}));
 
 SELECT 'Dictionary hashed_array_dictionary_complex_key_complex_attributes';
 SELECT 'dictGet existing value';
@@ -93,5 +99,7 @@ SELECT dictHas('hashed_array_dictionary_complex_key_complex_attributes', (number
 SELECT 'select all values as input stream';
 SELECT * FROM hashed_array_dictionary_complex_key_complex_attributes ORDER BY (id, id_key);
 
+{% endfor %}
+
 DROP DICTIONARY hashed_array_dictionary_complex_key_complex_attributes;
 DROP TABLE complex_key_complex_attributes_source_table;
diff --git a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference
index 7f4ba0901b6..0b0b4175e1f 100644
--- a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference
+++ b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference
@@ -33,3 +33,38 @@ Get descendants at first level
 []
 []
 []
+Get hierarchy
+[]
+[1]
+[2,1]
+[3,1]
+[4,2,1]
+[]
+Get is in hierarchy
+0
+1
+1
+1
+1
+0
+Get children
+[1]
+[2,3]
+[4]
+[]
+[]
+[]
+Get all descendants
+[1,2,3,4]
+[2,3,4]
+[4]
+[]
+[]
+[]
+Get descendants at first level
+[1]
+[2,3]
+[4]
+[]
+[]
+[]
diff --git a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql.j2
similarity index 91%
rename from tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql
rename to tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql.j2
index a775f0e5cbf..bc13bcfdb09 100644
--- a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql
+++ b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql.j2
@@ -7,6 +7,8 @@ CREATE TABLE hierarchy_source_table
 
 INSERT INTO hierarchy_source_table VALUES (1, 0), (2, 1), (3, 1), (4, 2);
 
+{% for dictionary_config in ['', 'SHARDS 16'] -%}
+
 DROP DICTIONARY IF EXISTS hierarchy_hashed_array_dictionary;
 CREATE DICTIONARY hierarchy_hashed_array_dictionary
 (
@@ -15,7 +17,7 @@ CREATE DICTIONARY hierarchy_hashed_array_dictionary
 )
 PRIMARY KEY id
 SOURCE(CLICKHOUSE(TABLE 'hierarchy_source_table'))
-LAYOUT(HASHED_ARRAY())
+LAYOUT(HASHED_ARRAY({{ dictionary_config }}))
 LIFETIME(MIN 1 MAX 1000);
 
 SELECT 'Get hierarchy';
@@ -29,6 +31,8 @@ SELECT dictGetDescendants('hierarchy_hashed_array_dictionary', number) FROM syst
 SELECT 'Get descendants at first level';
 SELECT dictGetDescendants('hierarchy_hashed_array_dictionary', number, 1) FROM system.numbers LIMIT 6;
 
+{% endfor %}
+
 DROP DICTIONARY hierarchy_hashed_array_dictionary;
 
 DROP TABLE hierarchy_source_table;
diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference
index 60d9fb16c5f..ab6a247219b 100644
--- a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference
+++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference
@@ -106,6 +106,42 @@ Get descendants at first level
 []
 []
 []
+HashedArray dictionary
+Get hierarchy
+[0]
+[1,0]
+[2,1,0]
+[3]
+[4,2,1,0]
+[]
+Get is in hierarchy
+1
+1
+1
+1
+1
+0
+Get children
+[1]
+[2]
+[4]
+[]
+[]
+[]
+Get all descendants
+[1,2,4]
+[2,4]
+[4]
+[]
+[]
+[]
+Get descendants at first level
+[1]
+[2]
+[4]
+[]
+[]
+[]
 Cache dictionary
 Get hierarchy
 [0]
diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql.j2
similarity index 97%
rename from tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql
rename to tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql.j2
index d477d58d398..b456495513e 100644
--- a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql
+++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql.j2
@@ -56,7 +56,7 @@ SELECT 'Get descendants at first level';
 SELECT dictGetDescendants('hierachical_hashed_dictionary', number, 1) FROM system.numbers LIMIT 6;
 
 DROP DICTIONARY hierachical_hashed_dictionary;
-
+{% for dictionary_config in ['', 'SHARDS 16'] -%}
 DROP DICTIONARY IF EXISTS hierachical_hashed_array_dictionary;
 CREATE DICTIONARY hierachical_hashed_array_dictionary
 (
@@ -64,7 +64,7 @@ CREATE DICTIONARY hierachical_hashed_array_dictionary
     parent_id Nullable(UInt64) HIERARCHICAL
 ) PRIMARY KEY id
 SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
-LAYOUT(HASHED_ARRAY())
+LAYOUT(HASHED_ARRAY({{ dictionary_config }}))
 LIFETIME(0);
 
 SELECT 'HashedArray dictionary';
@@ -82,6 +82,8 @@ SELECT dictGetDescendants('hierachical_hashed_array_dictionary', number, 1) FROM
 
 DROP DICTIONARY hierachical_hashed_array_dictionary;
 
+{% endfor %}
+
 DROP DICTIONARY IF EXISTS hierachical_cache_dictionary;
 CREATE DICTIONARY hierachical_cache_dictionary
 (
diff --git a/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2 b/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2
index ea979506e07..67e8f098217 100644
--- a/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2
+++ b/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2
@@ -14,6 +14,7 @@ SET max_memory_usage='4Mi';
     'FLAT(INITIAL_ARRAY_SIZE 3_000_000 MAX_ARRAY_SIZE 3_000_000)',
     'HASHED()',
     'HASHED_ARRAY()',
+    'HASHED_ARRAY(SHARDS 2)',
     'SPARSE_HASHED()',
     'SPARSE_HASHED(SHARDS 2 /* shards are special, they use threads */)',
 ] %}

From 1960713176777cc75a082170bbb75ea3b4accdbc Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 13 Dec 2023 17:27:23 +0000
Subject: [PATCH 044/137] add docs

---
 .../statements/alter/apply-deleted-mask.md    | 22 +++++++++++++++++++
 .../sql-reference/statements/alter/index.md   |  5 +++--
 src/Interpreters/MutationsInterpreter.cpp     |  6 ++---
 3 files changed, 28 insertions(+), 5 deletions(-)
 create mode 100644 docs/en/sql-reference/statements/alter/apply-deleted-mask.md

diff --git a/docs/en/sql-reference/statements/alter/apply-deleted-mask.md b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md
new file mode 100644
index 00000000000..7a11d66e739
--- /dev/null
+++ b/docs/en/sql-reference/statements/alter/apply-deleted-mask.md
@@ -0,0 +1,22 @@
+---
+slug: /en/sql-reference/statements/alter/apply-deleted-mask
+sidebar_position: 46
+sidebar_label: APPLY DELETED MASK
+---
+
+# Apply mask of deleted rows
+
+``` sql
+ALTER TABLE [db].name [ON CLUSTER cluster] APPLY DELETED MASK [IN PARTITION partition_id]
+```
+
+The command applies mask created by [lightweight delete](/docs/en/sql-reference/statements/delete) and forcefully removes rows marked as deleted from disk. This command is a heavyweight mutation and it semantically equals to query ```ALTER TABLE [db].name DELETE WHERE _row_exists = 0```.
+
+:::note
+It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables).
+:::
+
+**See also**
+
+- [Lightweight deletes](/docs/en/sql-reference/statements/delete)
+- [Heavyweight deletes](/docs/en/sql-reference/statements/alter/delete.md)
diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md
index d28542e0a43..dc6668c7983 100644
--- a/docs/en/sql-reference/statements/alter/index.md
+++ b/docs/en/sql-reference/statements/alter/index.md
@@ -17,8 +17,9 @@ Most `ALTER TABLE` queries modify table settings or data:
 - [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md)
 - [TTL](/docs/en/sql-reference/statements/alter/ttl.md)
 - [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md)
+- [APPLY DELETED MASK](/docs/en/sql-reference/statements/alter/apply-deleted-mask.md)
 
-:::note    
+:::note
 Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md).
 :::
 
@@ -59,7 +60,7 @@ For all `ALTER` queries, you can use the [alter_sync](/docs/en/operations/settin
 
 You can specify how long (in seconds) to wait for inactive replicas to execute all `ALTER` queries with the [replication_wait_for_inactive_replica_timeout](/docs/en/operations/settings/settings.md/#replication-wait-for-inactive-replica-timeout) setting.
 
-:::note    
+:::note
 For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown.
 :::
 
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index a492ea266cf..bf50766c165 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -269,9 +269,9 @@ MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command)
         std::make_shared<ASTLiteral>(Field(0)));
 
     if (command.predicate)
-        alter_command->predicate = makeASTFunction("and", row_exists_predicate, command.predicate);
-    else
-        alter_command->predicate = row_exists_predicate;
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Mutation command APPLY DELETED MASK does not support WHERE clause");
+
+    alter_command->predicate = row_exists_predicate;
 
     auto mutation_command = MutationCommand::parse(alter_command.get());
     if (!mutation_command)

From f4e2933f5d27afaa3082b60844ccb2877587aae2 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 13 Dec 2023 18:08:41 +0000
Subject: [PATCH 045/137] use storage uuid instead of table name as a key in
 storage info because there are can be multiple dropped table with the same
 name

---
 .../System/StorageSystemDroppedTablesParts.cpp        |  7 ++++++-
 src/Storages/System/StorageSystemPartsBase.cpp        |  8 +++++++-
 src/Storages/System/StorageSystemPartsBase.h          | 11 ++++++-----
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
index ac8c65def8e..f728c16f1cb 100644
--- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp
+++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
@@ -36,6 +36,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & que
     MutableColumnPtr table_column_mut = ColumnString::create();
     MutableColumnPtr engine_column_mut = ColumnString::create();
     MutableColumnPtr active_column_mut = ColumnUInt8::create();
+    MutableColumnPtr storage_uuid_column_mut = ColumnUUID::create();
 
     const auto access = context->getAccess();
     const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES);
@@ -47,6 +48,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & que
         if (!storage)
             continue;
 
+        UUID storage_uuid = storage->getStorageID().uuid;
         String database_name = storage->getStorageID().getDatabaseName();
         String table_name = storage->getStorageID().getTableName();
         String engine_name = storage->getName();
@@ -63,7 +65,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & que
         if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
             continue;
 
-        storages[std::make_pair(database_name, table_name)] = storage;
+        storages[storage_uuid] = storage;
 
         /// Add all combinations of flag 'active'.
         for (UInt64 active : {0, 1})
@@ -72,6 +74,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & que
             table_column_mut->insert(table_name);
             engine_column_mut->insert(engine_name);
             active_column_mut->insert(active);
+            storage_uuid_column_mut->insert(storage_uuid);
         }
     }
 
@@ -79,6 +82,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & que
     block_to_filter.insert(ColumnWithTypeAndName(std::move(table_column_mut), std::make_shared<DataTypeString>(), "table"));
     block_to_filter.insert(ColumnWithTypeAndName(std::move(engine_column_mut), std::make_shared<DataTypeString>(), "engine"));
     block_to_filter.insert(ColumnWithTypeAndName(std::move(active_column_mut), std::make_shared<DataTypeUInt8>(), "active"));
+    block_to_filter.insert(ColumnWithTypeAndName(std::move(storage_uuid_column_mut), std::make_shared<DataTypeUUID>(), "uuid"));
 
     if (block_to_filter.rows())
     {
@@ -90,6 +94,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & que
     database_column = block_to_filter.getByName("database").column;
     table_column = block_to_filter.getByName("table").column;
     active_column = block_to_filter.getByName("active").column;
+    storage_uuid_column = block_to_filter.getByName("uuid").column;
 }
 
 
diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index bf476c3da71..5befb92adee 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -6,6 +6,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/StorageMaterializedMySQL.h>
 #include <Storages/VirtualColumnUtils.h>
@@ -93,6 +94,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
     MutableColumnPtr table_column_mut = ColumnString::create();
     MutableColumnPtr engine_column_mut = ColumnString::create();
     MutableColumnPtr active_column_mut = ColumnUInt8::create();
+    MutableColumnPtr storage_uuid_column_mut = ColumnUUID::create();
 
     const auto access = context->getAccess();
     const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES);
@@ -139,6 +141,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
                         continue;
 
                     String engine_name = storage->getName();
+                    UUID storage_uuid = storage->getStorageID().uuid;
 
 #if USE_MYSQL
                     if (auto * proxy = dynamic_cast<StorageMaterializedMySQL *>(storage.get()))
@@ -153,7 +156,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
                     if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
                         continue;
 
-                    storages[std::make_pair(database_name, iterator->name())] = storage;
+                    storages[storage_uuid] = storage;
 
                     /// Add all combinations of flag 'active'.
                     for (UInt64 active : {0, 1})
@@ -161,6 +164,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
                         table_column_mut->insert(table_name);
                         engine_column_mut->insert(engine_name);
                         active_column_mut->insert(active);
+                        storage_uuid_column_mut->insert(storage_uuid);
                     }
 
                     offsets[i] += 2;
@@ -178,6 +182,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
     block_to_filter.insert(ColumnWithTypeAndName(std::move(table_column_mut), std::make_shared<DataTypeString>(), "table"));
     block_to_filter.insert(ColumnWithTypeAndName(std::move(engine_column_mut), std::make_shared<DataTypeString>(), "engine"));
     block_to_filter.insert(ColumnWithTypeAndName(std::move(active_column_mut), std::make_shared<DataTypeUInt8>(), "active"));
+    block_to_filter.insert(ColumnWithTypeAndName(std::move(storage_uuid_column_mut), std::make_shared<DataTypeUUID>(), "uuid"));
 
     if (rows)
     {
@@ -189,6 +194,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
     database_column = block_to_filter.getByName("database").column;
     table_column = block_to_filter.getByName("table").column;
     active_column = block_to_filter.getByName("active").column;
+    storage_uuid_column = block_to_filter.getByName("uuid").column;
 }
 
 
diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h
index 6ece6ae25b8..f3af029d43e 100644
--- a/src/Storages/System/StorageSystemPartsBase.h
+++ b/src/Storages/System/StorageSystemPartsBase.h
@@ -44,11 +44,11 @@ public:
 
             info.database = (*database_column)[next_row].get<String>();
             info.table = (*table_column)[next_row].get<String>();
+            UUID storage_uuid = (*storage_uuid_column)[next_row].get<UUID>();
 
-            auto is_same_table = [&info, this] (size_t row) -> bool
+            auto is_same_table = [&storage_uuid, this] (size_t row) -> bool
             {
-                return (*database_column)[row].get<String>() == info.database &&
-                    (*table_column)[row].get<String>() == info.table;
+                return (*storage_uuid_column)[row].get<UUID>() == storage_uuid;
             };
 
             /// We may have two rows per table which differ in 'active' value.
@@ -61,7 +61,7 @@ public:
                     info.need_inactive_parts = true;
             }
 
-            info.storage = storages.at(std::make_pair(info.database, info.table));
+            info.storage = storages.at(storage_uuid);
 
             if (needsLock)
             {
@@ -94,11 +94,12 @@ protected:
     ColumnPtr database_column;
     ColumnPtr table_column;
     ColumnPtr active_column;
+    ColumnPtr storage_uuid_column;
 
     size_t next_row;
     size_t rows;
 
-    using StoragesMap = std::map<std::pair<String, String>, StoragePtr>;
+    using StoragesMap = std::unordered_map<UUID, StoragePtr>;
     StoragesMap storages;
 
     bool needsLock = true;

From 441050adceebf9d0ac6cded1b29f57c6d1d215d3 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 13 Dec 2023 22:26:38 +0000
Subject: [PATCH 046/137] fix style, cleanup

---
 .../System/StorageSystemDroppedTablesParts.cpp      | 13 +------------
 src/Storages/System/StorageSystemPartsBase.cpp      |  5 -----
 src/Storages/System/StorageSystemPartsBase.h        |  5 +++++
 3 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
index f728c16f1cb..2e8d45119f5 100644
--- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp
+++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
@@ -2,20 +2,9 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <Access/ContextAccess.h>
 #include <Storages/System/StorageSystemDroppedTablesParts.h>
-#include <atomic>
-#include <memory>
-#include <string_view>
-
-#include <Storages/MergeTree/MergeTreeData.h>
 #include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeUUID.h>
-#include <Parsers/queryToString.h>
-#include <Interpreters/TransactionVersionMetadata.h>
-#include <Interpreters/Context.h>
+
 
 
 namespace DB
diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index 5befb92adee..3c00001d46c 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -23,11 +23,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 bool StorageSystemPartsBase::hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot)
 {
     bool has_state_column = false;
diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h
index f3af029d43e..fd20b0756b2 100644
--- a/src/Storages/System/StorageSystemPartsBase.h
+++ b/src/Storages/System/StorageSystemPartsBase.h
@@ -8,6 +8,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 class Context;
 
 struct StoragesInfo

From de755202dd5703e10a7d97472078ce2c5d0b5379 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Wed, 13 Dec 2023 18:32:54 -0500
Subject: [PATCH 047/137] Create dropped_tables_parts.md

---
 .../system-tables/dropped_tables_parts.md          | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 docs/en/operations/system-tables/dropped_tables_parts.md

diff --git a/docs/en/operations/system-tables/dropped_tables_parts.md b/docs/en/operations/system-tables/dropped_tables_parts.md
new file mode 100644
index 00000000000..095f35287fe
--- /dev/null
+++ b/docs/en/operations/system-tables/dropped_tables_parts.md
@@ -0,0 +1,14 @@
+---
+slug: /en/operations/system-tables/dropped_tables_parts
+---
+# dropped_tables_parts {#system_tables-dropped_tables_parts}
+
+Contains information about parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) dropped tables from [system.dropped_tables](./dropped_tables.md)
+
+The schema of this table is the same as [system.parts](./parts.md)
+
+**See Also**
+
+- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
+- [system.parts](./parts.md)
+- [system.dropped_tables](./dropped_tables.md)

From 9e1d583f54b4926e9e191510b24cb7c0a073761f Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 13 Dec 2023 23:36:54 +0000
Subject: [PATCH 048/137] fix style

---
 src/Storages/System/StorageSystemDroppedTablesParts.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
index 2e8d45119f5..9253cc99d72 100644
--- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp
+++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
@@ -6,7 +6,6 @@
 #include <DataTypes/DataTypeUUID.h>
 
 
-
 namespace DB
 {
 

From 4ff6d47304c21d001d5c8349104a39011edf1d5a Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 14 Dec 2023 14:35:58 +0000
Subject: [PATCH 049/137] fix some tests

---
 .../00133_long_shard_memory_tracker_and_exception_safety.sh  | 4 ++--
 .../01291_distributed_low_cardinality_memory_efficient.sql   | 5 +++++
 .../0_stateless/02178_column_function_insert_from.sql        | 3 +++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh
index e6e82e8c976..a42fd58190a 100755
--- a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh
+++ b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh
@@ -15,8 +15,8 @@ if [ -n "$DBMS_TESTS_UNDER_VALGRIND" ]; then
     STEP_MULTIPLIER=1000
 fi
 
-for i in $(seq 1000000 $((20000 * $STEP_MULTIPLIER)) 10000000 && seq 10100000 $((100000 * $STEP_MULTIPLIER)) 100000000); do
-    $CLICKHOUSE_CLIENT --max_memory_usage="$i" --query="
+for i in $(seq 1000000 $((20000 * $STEP_MULTIPLIER)) 10000000 && seq 10100000 $((100000 * $STEP_MULTIPLIER)) 50000000); do
+    $CLICKHOUSE_CLIENT --max_memory_usage="$i" --max_bytes_before_external_group_by 0 --query="
         SELECT intDiv(number, 5) AS k, max(toString(number)) FROM remote('127.0.0.{2,3}', ${CLICKHOUSE_DATABASE}.numbers_100k) GROUP BY k ORDER BY k LIMIT 1;
     " 2> /dev/null;
     CODE=$?;
diff --git a/tests/queries/0_stateless/01291_distributed_low_cardinality_memory_efficient.sql b/tests/queries/0_stateless/01291_distributed_low_cardinality_memory_efficient.sql
index 267f5585705..3697a167989 100644
--- a/tests/queries/0_stateless/01291_distributed_low_cardinality_memory_efficient.sql
+++ b/tests/queries/0_stateless/01291_distributed_low_cardinality_memory_efficient.sql
@@ -6,7 +6,12 @@ DROP TABLE IF EXISTS dist;
 create table data (key String) Engine=Memory();
 create table dist (key LowCardinality(String)) engine=Distributed(test_cluster_two_shards, currentDatabase(), data);
 insert into data values ('foo');
+
 set distributed_aggregation_memory_efficient=1;
+
+-- There is an obscure bug in rare corner case.
+set max_bytes_before_external_group_by = 0;
+
 select * from dist group by key;
 
 DROP TABLE data;
diff --git a/tests/queries/0_stateless/02178_column_function_insert_from.sql b/tests/queries/0_stateless/02178_column_function_insert_from.sql
index 13d1ebb4788..dc7c134b6f9 100644
--- a/tests/queries/0_stateless/02178_column_function_insert_from.sql
+++ b/tests/queries/0_stateless/02178_column_function_insert_from.sql
@@ -8,6 +8,9 @@ INSERT INTO TESTTABLE values (0,'0',['1']), (1,'1',['1']);
 
 SET max_threads = 1;
 
+-- There is a bug which is fixed in new analyzer.
+SET max_bytes_before_external_group_by = 0;
+
 SELECT attr, _id, arrayFilter(x -> (x IN (select '1')), attr_list) z
 FROM TESTTABLE ARRAY JOIN z AS attr ORDER BY _id LIMIT 3 BY attr;
 

From b4bef78e639a79335e35b49ba153e82de086bc1e Mon Sep 17 00:00:00 2001
From: joelynch <joelynch112@gmail.com>
Date: Thu, 14 Dec 2023 17:07:26 +0100
Subject: [PATCH 050/137] bugfix: addresses_expr ignored for psql named
 collections

---
 src/Databases/DatabaseFactory.cpp                      |  4 ++--
 src/Storages/StoragePostgreSQL.cpp                     | 10 ++++++++--
 src/Storages/StoragePostgreSQL.h                       |  2 +-
 .../configs/named_collections.xml                      |  3 +--
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index 53d5245770e..a967ecf67c6 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -323,7 +323,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
 
         if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
         {
-            configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, false);
+            configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context, false);
             use_table_cache = named_collection->getOrDefault<UInt64>("use_table_cache", 0);
         }
         else
@@ -386,7 +386,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
 
         if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
         {
-            configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, false);
+            configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context, false);
         }
         else
         {
diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp
index 7961c44e844..a97104a5a68 100644
--- a/src/Storages/StoragePostgreSQL.cpp
+++ b/src/Storages/StoragePostgreSQL.cpp
@@ -456,7 +456,7 @@ SinkToStoragePtr StoragePostgreSQL::write(
     return std::make_shared<PostgreSQLSink>(metadata_snapshot, pool->get(), remote_table_name, remote_table_schema, on_conflict);
 }
 
-StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult(const NamedCollection & named_collection, bool require_table)
+StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult(const NamedCollection & named_collection, ContextPtr context_, bool require_table)
 {
     StoragePostgreSQL::Configuration configuration;
     ValidateKeysMultiset<ExternalDatabaseEqualKeysSet> required_arguments = {"user", "username", "password", "database", "db"};
@@ -473,6 +473,12 @@ StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult
         configuration.port = static_cast<UInt16>(named_collection.get<UInt64>("port"));
         configuration.addresses = {std::make_pair(configuration.host, configuration.port)};
     }
+    else
+    {
+        size_t max_addresses = context_->getSettingsRef().glob_expansion_max_elements;
+        configuration.addresses = parseRemoteDescriptionForExternalDatabase(
+            configuration.addresses_expr, max_addresses, 5432);
+    }
 
     configuration.username = named_collection.getAny<String>({"username", "user"});
     configuration.password = named_collection.get<String>("password");
@@ -490,7 +496,7 @@ StoragePostgreSQL::Configuration StoragePostgreSQL::getConfiguration(ASTs engine
     StoragePostgreSQL::Configuration configuration;
     if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
     {
-        configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection);
+        configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context);
     }
     else
     {
diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h
index fb8b5a22df2..725a935aa46 100644
--- a/src/Storages/StoragePostgreSQL.h
+++ b/src/Storages/StoragePostgreSQL.h
@@ -65,7 +65,7 @@ public:
 
     static Configuration getConfiguration(ASTs engine_args, ContextPtr context);
 
-    static Configuration processNamedCollectionResult(const NamedCollection & named_collection, bool require_table = true);
+    static Configuration processNamedCollectionResult(const NamedCollection & named_collection, ContextPtr context_, bool require_table = true);
 
     static ColumnsDescription getTableStructureFromData(
         const postgres::PoolWithFailoverPtr & pool_,
diff --git a/tests/integration/test_storage_postgresql/configs/named_collections.xml b/tests/integration/test_storage_postgresql/configs/named_collections.xml
index 129225f36b9..ebe9f7ce9ce 100644
--- a/tests/integration/test_storage_postgresql/configs/named_collections.xml
+++ b/tests/integration/test_storage_postgresql/configs/named_collections.xml
@@ -16,8 +16,7 @@
         <postgres3>
             <user>postgres</user>
             <password>mysecretpassword</password>
-            <host>postgres1</host>
-            <port>1111</port>
+            <addresses_expr>postgres1:1111</addresses_expr>
             <database>postgres</database>
             <table>test_table</table>
         </postgres3>

From 6e6eb5c01efd64b7a725ec6d5dd3c6d98798f0ab Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Fri, 15 Dec 2023 06:49:12 +0000
Subject: [PATCH 051/137] fast get default enum serialization

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/DataTypes/DataTypeEnum.cpp                |  2 +-
 .../Serializations/SerializationEnum.cpp      | 24 +++++++++----------
 .../Serializations/SerializationEnum.h        | 24 +++++++++++++++----
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp
index e5efb73cfca..a1d5e4b39b7 100644
--- a/src/DataTypes/DataTypeEnum.cpp
+++ b/src/DataTypes/DataTypeEnum.cpp
@@ -170,7 +170,7 @@ bool DataTypeEnum<Type>::contains(const IDataType & rhs) const
 template <typename Type>
 SerializationPtr DataTypeEnum<Type>::doGetDefaultSerialization() const
 {
-    return std::make_shared<SerializationEnum<Type>>(this->getValues());
+    return std::make_shared<SerializationEnum<Type>>(std::static_pointer_cast<const DataTypeEnum<Type>>(shared_from_this()));
 }
 
 
diff --git a/src/DataTypes/Serializations/SerializationEnum.cpp b/src/DataTypes/Serializations/SerializationEnum.cpp
index 09b0b02714c..ee427bc1043 100644
--- a/src/DataTypes/Serializations/SerializationEnum.cpp
+++ b/src/DataTypes/Serializations/SerializationEnum.cpp
@@ -11,13 +11,13 @@ namespace DB
 template <typename Type>
 void SerializationEnum<Type>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+    writeString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeEscapedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
+    writeEscapedString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
 }
 
 template <typename Type>
@@ -30,14 +30,14 @@ void SerializationEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffe
         /// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
         std::string field_name;
         readEscapedString(field_name, istr);
-        assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
+        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name), true));
     }
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeQuotedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+    writeQuotedString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
 }
 
 template <typename Type>
@@ -45,7 +45,7 @@ void SerializationEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer
 {
     std::string field_name;
     readQuotedStringWithSQLStyle(field_name, istr);
-    assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name)));
+    assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name)));
 }
 
 template <typename Type>
@@ -61,20 +61,20 @@ void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer
     {
         std::string field_name;
         readStringUntilEOF(field_name, istr);
-        assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
+        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name), true));
     }
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    writeJSONString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
+    writeJSONString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeXMLStringForTextElement(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
+    writeXMLStringForTextElement(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
 }
 
 template <typename Type>
@@ -86,14 +86,14 @@ void SerializationEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer &
     {
         std::string field_name;
         readJSONString(field_name, istr);
-        assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name)));
+        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name)));
     }
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeCSVString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+    writeCSVString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
 }
 
 template <typename Type>
@@ -105,7 +105,7 @@ void SerializationEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer &
     {
         std::string field_name;
         readCSVString(field_name, istr, settings.csv);
-        assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
+        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name), true));
     }
 }
 
@@ -114,7 +114,7 @@ void SerializationEnum<Type>::serializeTextMarkdown(
     const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (settings.markdown.escape_special_characters)
-        writeMarkdownEscapedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
+        writeMarkdownEscapedString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
     else
         serializeTextEscaped(column, row_num, ostr, settings);
 }
diff --git a/src/DataTypes/Serializations/SerializationEnum.h b/src/DataTypes/Serializations/SerializationEnum.h
index 49a0e4943e0..8611617de06 100644
--- a/src/DataTypes/Serializations/SerializationEnum.h
+++ b/src/DataTypes/Serializations/SerializationEnum.h
@@ -1,20 +1,32 @@
 #pragma once
 
+#include <memory>
 #include <DataTypes/Serializations/SerializationNumber.h>
 #include <DataTypes/EnumValues.h>
+#include <DataTypes/DataTypeEnum.h>
 
 namespace DB
 {
 
 template <typename Type>
-class SerializationEnum : public SerializationNumber<Type>, public EnumValues<Type>
+class SerializationEnum : public SerializationNumber<Type>
 {
 public:
     using typename SerializationNumber<Type>::FieldType;
     using typename SerializationNumber<Type>::ColumnType;
-    using typename EnumValues<Type>::Values;
+    using Values = EnumValues<Type>::Values;
 
-    explicit SerializationEnum(const Values & values_) : EnumValues<Type>(values_) {}
+    SerializationEnum() = delete;
+    /// To explicitly create a SerializationEnum from Values
+    explicit SerializationEnum(const Values & values_) : own_enum_values(values_), ref_enum_values(&own_enum_values.value()) { }
+    /// To create a SerializationEnum from an IDataType instance, will reuse EnumValues from the type
+    /// Motivation: some Enum type has many elements, and building EnumValues is not trivial
+    /// This constructor allow to create many SerializationEnum from same IDataType without rebuilding
+    /// EnumValues for every call, so it's useful to get default serialization.
+    explicit SerializationEnum(const std::shared_ptr<const DataTypeEnum<Type>> & enum_type)
+        : own_enum_type(enum_type), ref_enum_values(static_cast<const EnumValues<Type> *>(enum_type.get()))
+    {
+    }
 
     void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@@ -35,8 +47,12 @@ public:
     {
         FieldType x;
         readText(x, istr);
-        return this->findByValue(x)->first;
+        return ref_enum_values->findByValue(x)->first;
     }
+
+    std::optional<EnumValues<Type>> own_enum_values;
+    std::shared_ptr<const DataTypeEnum<Type>> own_enum_type;
+    const EnumValues<Type> * ref_enum_values;
 };
 
 }

From 8a5ffbb5f249bd83e9c8e960785f7f76b0a2d0bf Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 14 Dec 2023 12:45:01 +0300
Subject: [PATCH 052/137] InterpreterCreateQuery sample block fix

---
 src/Interpreters/InterpreterCreateQuery.cpp   | 39 +++++++-------
 .../Transforms/buildPushingToViewsChain.cpp   |  2 +-
 ...ery_interpreter_sample_block_fix.reference |  9 ++++
 ...ate_query_interpreter_sample_block_fix.sql | 52 +++++++++++++++++++
 4 files changed, 81 insertions(+), 21 deletions(-)
 create mode 100644 tests/queries/0_stateless/02943_create_query_interpreter_sample_block_fix.reference
 create mode 100644 tests/queries/0_stateless/02943_create_query_interpreter_sample_block_fix.sql

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 2b60b0b7b47..98f52d9037d 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -786,10 +786,23 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
         }
         else
         {
-            as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(),
-                getContext(),
-                false /* is_subquery */,
-                create.isParameterizedView());
+            /** To get valid sample block we need to prepare query without only_analyze, because we need to execute scalar
+              * subqueries. Otherwise functions that expect only constant arguments will throw error during query analysis,
+              * because the result of scalar subquery is not a constant.
+              *
+              * Example:
+              * CREATE MATERIALIZED VIEW test_mv ENGINE=MergeTree ORDER BY arr
+              * AS
+              * WITH (SELECT '\d[a-z]') AS constant_value
+              * SELECT extractAll(concat(toString(number), 'a'), assumeNotNull(constant_value)) AS arr
+              * FROM test_table;
+              *
+              * For new analyzer this issue does not exists because we always execute scalar subqueries.
+              * We can improve this in new analyzer, and execute scalar subqueries only in contexts when we expect constant
+              * for example: LIMIT, OFFSET, functions parameters, functions constant only arguments.
+              */
+            InterpreterSelectWithUnionQuery interpreter(create.select->clone(), getContext(), {});
+            as_select_sample = interpreter.getSampleBlock();
         }
 
         properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList());
@@ -1213,28 +1226,14 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
             getContext()
         ))
         {
-            Block input_block;
-
-            if (getContext()->getSettingsRef().allow_experimental_analyzer)
-            {
-                input_block = InterpreterSelectQueryAnalyzer::getSampleBlock(create.select->clone(), getContext());
-            }
-            else
-            {
-                input_block = InterpreterSelectWithUnionQuery(create.select->clone(),
-                    getContext(),
-                    SelectQueryOptions().analyze()).getSampleBlock();
-            }
-
             Block output_block = to_table->getInMemoryMetadataPtr()->getSampleBlock();
-
             ColumnsWithTypeAndName input_columns;
             ColumnsWithTypeAndName output_columns;
-            for (const auto & input_column : input_block)
+            for (const auto & input_column : properties.columns)
             {
                 if (const auto * output_column = output_block.findByName(input_column.name))
                 {
-                    input_columns.push_back(input_column.cloneEmpty());
+                    input_columns.push_back({input_column.type, input_column.name});
                     output_columns.push_back(output_column->cloneEmpty());
                 }
             }
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 781a162d2bd..f85dc28f4c7 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -341,7 +341,7 @@ Chain buildPushingToViewsChain(
             if (select_context->getSettingsRef().allow_experimental_analyzer)
                 header = InterpreterSelectQueryAnalyzer::getSampleBlock(query, select_context);
             else
-                header = InterpreterSelectQuery(query, select_context, SelectQueryOptions().analyze()).getSampleBlock();
+                header = InterpreterSelectQuery(query, select_context, SelectQueryOptions()).getSampleBlock();
 
             /// Insert only columns returned by select.
             Names insert_columns;
diff --git a/tests/queries/0_stateless/02943_create_query_interpreter_sample_block_fix.reference b/tests/queries/0_stateless/02943_create_query_interpreter_sample_block_fix.reference
new file mode 100644
index 00000000000..a0226ef9dd7
--- /dev/null
+++ b/tests/queries/0_stateless/02943_create_query_interpreter_sample_block_fix.reference
@@ -0,0 +1,9 @@
+['0a']
+--
+['0a']
+['1a']
+--
+['0a']
+--
+['0a']
+['1a']
diff --git a/tests/queries/0_stateless/02943_create_query_interpreter_sample_block_fix.sql b/tests/queries/0_stateless/02943_create_query_interpreter_sample_block_fix.sql
new file mode 100644
index 00000000000..0262393fd48
--- /dev/null
+++ b/tests/queries/0_stateless/02943_create_query_interpreter_sample_block_fix.sql
@@ -0,0 +1,52 @@
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    number UInt64
+)
+ENGINE=MergeTree ORDER BY number;
+
+DROP VIEW IF EXISTS test_mv;
+CREATE MATERIALIZED VIEW test_mv ENGINE=MergeTree ORDER BY arr
+AS
+WITH (SELECT '\d[a-z]') AS constant_value
+SELECT extractAll(concat(toString(number), 'a'), assumeNotNull(constant_value)) AS arr
+FROM test_table;
+
+INSERT INTO test_table VALUES (0);
+SELECT * FROM test_mv ORDER BY arr;
+
+SELECT '--';
+
+INSERT INTO test_table VALUES (1);
+SELECT * FROM test_mv ORDER BY arr;
+
+SELECT '--';
+
+TRUNCATE test_table;
+
+DROP TABLE IF EXISTS regex_test_table;
+CREATE TABLE regex_test_table
+(
+    regex String
+)
+ENGINE = MergeTree ORDER BY regex;
+
+INSERT INTO regex_test_table VALUES ('\d[a-z]');
+
+DROP VIEW test_mv;
+CREATE MATERIALIZED VIEW test_mv ENGINE=MergeTree ORDER BY arr
+AS
+WITH (SELECT regex FROM regex_test_table) AS constant_value
+SELECT extractAll(concat(toString(number), 'a'), assumeNotNull(constant_value)) AS arr
+FROM test_table;
+
+INSERT INTO test_table VALUES (0);
+SELECT * FROM test_mv ORDER BY arr;
+
+SELECT '--';
+
+INSERT INTO test_table VALUES (1);
+SELECT * FROM test_mv ORDER BY arr;
+
+DROP VIEW test_mv;
+DROP TABLE test_table;

From 0dcb3133c7591d81a9901c879699fd836dca7e08 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 15 Dec 2023 13:54:58 +0100
Subject: [PATCH 053/137] Allow to dynamically change fs cache size

---
 src/Interpreters/Cache/FileCache.cpp          | 76 +++++++++++++-----
 src/Interpreters/Cache/FileCacheFactory.cpp   | 32 +++++---
 src/Interpreters/Cache/FileCacheFactory.h     |  2 +
 src/Interpreters/Cache/IFileCachePriority.h   | 10 ++-
 .../Cache/LRUFileCachePriority.cpp            | 52 +++++++++++-
 src/Interpreters/Cache/LRUFileCachePriority.h |  2 +
 src/Interpreters/Cache/Metadata.cpp           |  7 +-
 .../Cache/SLRUFileCachePriority.cpp           | 22 +++++-
 .../Cache/SLRUFileCachePriority.h             |  5 +-
 tests/config/config.d/storage_conf.xml        | 11 +++
 ...lly_change_filesystem_cache_size.reference | 20 +++++
 ...ynamically_change_filesystem_cache_size.sh | 79 +++++++++++++++++++
 12 files changed, 272 insertions(+), 46 deletions(-)
 create mode 100644 tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
 create mode 100755 tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh

diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index b6a3a0dadc5..0e799df1615 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -707,7 +707,7 @@ KeyMetadata::iterator FileCache::addFileSegment(
             stash_records.emplace(
                 stash_key, stash->queue->add(locked_key.getKeyMetadata(), offset, 0, *lock));
 
-            if (stash->queue->getElementsCount(*lock) > stash->queue->getElementsLimit())
+            if (stash->queue->getElementsCount(*lock) > stash->queue->getElementsLimit(*lock))
                 stash->queue->pop(*lock);
 
             result_state = FileSegment::State::DETACHED;
@@ -748,7 +748,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
     LOG_TEST(
         log, "Trying to reserve space ({} bytes) for {}:{}, current usage {}/{}",
         size, file_segment.key(), file_segment.offset(),
-        main_priority->getSize(cache_lock), main_priority->getSizeLimit());
+        main_priority->getSize(cache_lock), main_priority->getSizeLimit(cache_lock));
 
     /// In case of per query cache limit (by default disabled), we add/remove entries from both
     /// (main_priority and query_priority) priority queues, but iterate entries in order of query_priority,
@@ -760,7 +760,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
     {
         query_priority = &query_context->getPriority();
 
-        const bool query_limit_exceeded = query_priority->getSize(cache_lock) + size > query_priority->getSizeLimit();
+        const bool query_limit_exceeded = query_priority->getSize(cache_lock) + size > query_priority->getSizeLimit(cache_lock);
         if (query_limit_exceeded && !query_context->recacheOnFileCacheQueryLimitExceeded())
         {
             LOG_TEST(log, "Query limit exceeded, space reservation failed, "
@@ -771,7 +771,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
 
         LOG_TEST(
             log, "Using query limit, current usage: {}/{} (while reserving for {}:{})",
-            query_priority->getSize(cache_lock), query_priority->getSizeLimit(),
+            query_priority->getSize(cache_lock), query_priority->getSizeLimit(cache_lock),
             file_segment.key(), file_segment.offset());
     }
 
@@ -1057,9 +1057,11 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
 
             bool limits_satisfied;
             IFileCachePriority::IteratorPtr cache_it;
+            size_t size_limit = 0;
 
             {
                 auto lock = lockCache();
+                size_limit = main_priority->getSizeLimit(lock);
 
                 limits_satisfied = main_priority->canFit(size, lock);
                 if (limits_satisfied)
@@ -1109,7 +1111,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
                     log,
                     "Cache capacity changed (max size: {}), "
                     "cached file `{}` does not fit in cache anymore (size: {})",
-                    main_priority->getSizeLimit(), offset_it->path().string(), size);
+                    size_limit, offset_it->path().string(), size);
 
                 fs::remove(offset_it->path());
             }
@@ -1215,7 +1217,8 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings,
 
     std::lock_guard lock(apply_settings_mutex);
 
-    if (metadata.setBackgroundDownloadQueueSizeLimit(new_settings.background_download_queue_size_limit))
+    if (new_settings.background_download_queue_size_limit != actual_settings.background_download_queue_size_limit
+        && metadata.setBackgroundDownloadQueueSizeLimit(new_settings.background_download_queue_size_limit))
     {
         LOG_INFO(log, "Changed background_download_queue_size from {} to {}",
                  actual_settings.background_download_queue_size_limit,
@@ -1224,24 +1227,57 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings,
         actual_settings.background_download_queue_size_limit = new_settings.background_download_queue_size_limit;
     }
 
-    bool updated;
-    try
+    if (new_settings.background_download_threads != actual_settings.background_download_threads)
     {
-        updated = metadata.setBackgroundDownloadThreads(new_settings.background_download_threads);
-    }
-    catch (...)
-    {
-        actual_settings.background_download_threads = metadata.getBackgroundDownloadThreads();
-        throw;
+        bool updated = false;
+        try
+        {
+            updated = metadata.setBackgroundDownloadThreads(new_settings.background_download_threads);
+        }
+        catch (...)
+        {
+            actual_settings.background_download_threads = metadata.getBackgroundDownloadThreads();
+            throw;
+        }
+
+        if (updated)
+        {
+            LOG_INFO(log, "Changed background_download_threads from {} to {}",
+                    actual_settings.background_download_threads,
+                    new_settings.background_download_threads);
+
+            actual_settings.background_download_threads = new_settings.background_download_threads;
+        }
     }
 
-    if (updated)
-    {
-        LOG_INFO(log, "Changed background_download_threads from {} to {}",
-                 actual_settings.background_download_threads,
-                 new_settings.background_download_threads);
 
-        actual_settings.background_download_threads = new_settings.background_download_threads;
+    if (new_settings.max_size != actual_settings.max_size
+        || new_settings.max_elements != actual_settings.max_elements)
+    {
+        auto cache_lock = lockCache();
+
+        bool updated = false;
+        try
+        {
+            updated = main_priority->modifySizeLimits(
+                new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock);
+        }
+        catch (...)
+        {
+            actual_settings.max_size = main_priority->getSizeLimit(cache_lock);
+            actual_settings.max_elements = main_priority->getElementsLimit(cache_lock);
+            throw;
+        }
+
+        if (updated)
+        {
+            LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}",
+                    actual_settings.max_size, new_settings.max_size,
+                    actual_settings.max_elements, new_settings.max_elements);
+
+            actual_settings.max_size = main_priority->getSizeLimit(cache_lock);
+            actual_settings.max_elements = main_priority->getElementsLimit(cache_lock);
+        }
     }
 }
 
diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp
index 9ba96de26dc..84eafde9afd 100644
--- a/src/Interpreters/Cache/FileCacheFactory.cpp
+++ b/src/Interpreters/Cache/FileCacheFactory.cpp
@@ -25,6 +25,12 @@ FileCacheSettings FileCacheFactory::FileCacheData::getSettings() const
     return settings;
 }
 
+void FileCacheFactory::FileCacheData::setSettings(const FileCacheSettings & new_settings)
+{
+    std::lock_guard lock(settings_mutex);
+    settings = new_settings;
+}
+
 FileCacheFactory & FileCacheFactory::instance()
 {
     static FileCacheFactory ret;
@@ -100,21 +106,23 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig
         FileCacheSettings new_settings;
         new_settings.loadFromConfig(config, cache_info->config_path);
 
-        FileCacheSettings old_settings;
-        {
-            std::lock_guard lock(cache_info->settings_mutex);
-            if (new_settings == cache_info->settings)
-                continue;
+        FileCacheSettings old_settings = cache_info->getSettings();
+        if (old_settings == new_settings)
+            continue;
 
-            old_settings = cache_info->settings;
+        try
+        {
+            cache_info->cache->applySettingsIfPossible(new_settings, old_settings);
+        }
+        catch (...)
+        {
+            /// Settings changes could be partially applied in case of exception,
+            /// make sure cache_info->settings show correct state of applied settings.
+            cache_info->setSettings(old_settings);
+            throw;
         }
 
-        cache_info->cache->applySettingsIfPossible(new_settings, old_settings);
-
-        {
-            std::lock_guard lock(cache_info->settings_mutex);
-            cache_info->settings = old_settings;
-        }
+        cache_info->setSettings(old_settings);
     }
 }
 
diff --git a/src/Interpreters/Cache/FileCacheFactory.h b/src/Interpreters/Cache/FileCacheFactory.h
index 2148e520fd1..c60b247005b 100644
--- a/src/Interpreters/Cache/FileCacheFactory.h
+++ b/src/Interpreters/Cache/FileCacheFactory.h
@@ -24,6 +24,8 @@ public:
 
         FileCacheSettings getSettings() const;
 
+        void setSettings(const FileCacheSettings & new_settings);
+
         const FileCachePtr cache;
         const std::string config_path;
 
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index 0f407a3082c..52de36849ae 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -55,9 +55,9 @@ public:
 
     virtual ~IFileCachePriority() = default;
 
-    size_t getElementsLimit() const { return max_elements; }
+    size_t getElementsLimit(const CacheGuard::Lock &) const { return max_elements; }
 
-    size_t getSizeLimit() const { return max_size; }
+    size_t getSizeLimit(const CacheGuard::Lock &) const { return max_size; }
 
     virtual size_t getSize(const CacheGuard::Lock &) const = 0;
 
@@ -86,9 +86,11 @@ public:
         FinalizeEvictionFunc & finalize_eviction_func,
         const CacheGuard::Lock &) = 0;
 
+    virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) = 0;
+
 protected:
-    const size_t max_size = 0;
-    const size_t max_elements = 0;
+    size_t max_size = 0;
+    size_t max_elements = 0;
 };
 
 }
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index a6abaea11c3..ba9d624ca9b 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -16,6 +16,9 @@ namespace ProfileEvents
 {
     extern const Event FilesystemCacheEvictionSkippedFileSegments;
     extern const Event FilesystemCacheEvictionTries;
+    extern const Event FilesystemCacheEvictMicroseconds;
+    extern const Event FilesystemCacheEvictedBytes;
+    extern const Event FilesystemCacheEvictedFileSegments;
 }
 
 namespace DB
@@ -36,7 +39,7 @@ IFileCachePriority::IteratorPtr LRUFileCachePriority::add( /// NOLINT
     return std::make_shared<LRUIterator>(add(Entry(key_metadata->key, offset, size, key_metadata), lock));
 }
 
-LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock &)
+LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock & lock)
 {
     if (entry.size == 0)
     {
@@ -59,7 +62,7 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, cons
     }
 #endif
 
-    const auto & size_limit = getSizeLimit();
+    const auto & size_limit = getSizeLimit(lock);
     if (size_limit && current_size + entry.size > size_limit)
     {
         throw Exception(
@@ -288,6 +291,51 @@ std::vector<FileSegmentInfo> LRUFileCachePriority::dump(FileCache & cache, const
     return res;
 }
 
+bool LRUFileCachePriority::modifySizeLimits(
+    size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CacheGuard::Lock & lock)
+{
+    if (max_size == max_size_ && max_elements == max_elements_)
+        return false; /// Nothing to change.
+
+    auto check_limits_satisfied = [&]()
+    {
+        return (max_size_ == 0 || current_size <= max_size_)
+            && (max_elements_ == 0 || current_elements_num <= max_elements_);
+    };
+
+    if (check_limits_satisfied())
+    {
+        max_size = max_size_;
+        max_elements = max_elements_;
+        return true;
+    }
+
+    auto iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
+    {
+        chassert(segment_metadata->file_segment->assertCorrectness());
+
+        if (!segment_metadata->releasable())
+            return IterationResult::CONTINUE;
+
+        auto segment = segment_metadata->file_segment;
+        locked_key.removeFileSegment(segment->offset(), segment->lock());
+
+        ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments);
+        ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->getDownloadedSize());
+        return IterationResult::REMOVE_AND_CONTINUE;
+    };
+
+    auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds);
+    iterate(
+        [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
+        { return check_limits_satisfied() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); },
+        lock);
+
+    max_size = max_size_;
+    max_elements = max_elements_;
+    return true;
+}
+
 void LRUFileCachePriority::LRUIterator::remove(const CacheGuard::Lock & lock)
 {
     assertValid();
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 5ff6c61eb4d..2450ecbff39 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -48,6 +48,8 @@ public:
 
     void pop(const CacheGuard::Lock & lock) { remove(queue.begin(), lock); }
 
+    bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override;
+
 private:
     void updateElementsCount(int64_t num);
     void updateSize(int64_t size);
diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp
index 08c6151f1cb..f4aa7cc7ab1 100644
--- a/src/Interpreters/Cache/Metadata.cpp
+++ b/src/Interpreters/Cache/Metadata.cpp
@@ -681,7 +681,7 @@ void CacheMetadata::startup()
         download_threads.emplace_back(std::make_shared<DownloadThread>());
         download_threads.back()->thread = std::make_unique<ThreadFromGlobalPool>([this, thread = download_threads.back()] { downloadThreadFunc(thread->stop_flag); });
     }
-    cleanup_thread = std::make_unique<ThreadFromGlobalPool>(std::function{ [this]{ cleanupThreadFunc(); }});
+    cleanup_thread = std::make_unique<ThreadFromGlobalPool>([this]{ cleanupThreadFunc(); });
 }
 
 void CacheMetadata::shutdown()
@@ -708,10 +708,10 @@ bool CacheMetadata::setBackgroundDownloadThreads(size_t threads_num)
     if (threads_num == download_threads_num)
         return false;
 
+    SCOPE_EXIT({ download_threads_num = download_threads.size(); });
+
     if (threads_num > download_threads_num)
     {
-        SCOPE_EXIT({ download_threads_num = download_threads.size(); });
-
         size_t add_threads = threads_num - download_threads_num;
         for (size_t i = 0; i < add_threads; ++i)
         {
@@ -739,7 +739,6 @@ bool CacheMetadata::setBackgroundDownloadThreads(size_t threads_num)
         }
 
         download_queue->cv.notify_all();
-        SCOPE_EXIT({ download_threads_num = download_threads.size(); });
 
         for (size_t i = 0; i < remove_threads; ++i)
         {
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 71b8d44d438..56af33b98f3 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -21,14 +21,15 @@ namespace
 SLRUFileCachePriority::SLRUFileCachePriority(
     size_t max_size_,
     size_t max_elements_,
-    double size_ratio)
+    double size_ratio_)
     : IFileCachePriority(max_size_, max_elements_)
+    , size_ratio(size_ratio_)
     , protected_queue(LRUFileCachePriority(getRatio(max_size_, size_ratio), getRatio(max_elements_, size_ratio)))
     , probationary_queue(LRUFileCachePriority(getRatio(max_size_, 1 - size_ratio), getRatio(max_elements_, 1 - size_ratio)))
 {
     LOG_DEBUG(
         log, "Using probationary queue size: {}, protected queue size: {}",
-        probationary_queue.getSizeLimit(), protected_queue.getSizeLimit());
+        probationary_queue.max_size, protected_queue.max_elements);
 }
 
 size_t SLRUFileCachePriority::getSize(const CacheGuard::Lock & lock) const
@@ -151,7 +152,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
     /// Entry is in probationary queue.
     /// We need to move it to protected queue.
     const size_t size = iterator.getEntry().size;
-    if (size > protected_queue.getSizeLimit())
+    if (size > protected_queue.getSizeLimit(lock))
     {
         /// Entry size is bigger than the whole protected queue limit.
         /// This is only possible if protected_queue_size_limit is less than max_file_segment_size,
@@ -235,6 +236,21 @@ void SLRUFileCachePriority::shuffle(const CacheGuard::Lock & lock)
     probationary_queue.shuffle(lock);
 }
 
+bool SLRUFileCachePriority::modifySizeLimits(
+    size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock & lock)
+{
+    if (max_size == max_size_ && max_elements == max_elements_ && size_ratio == size_ratio_)
+        return false; /// Nothing to change.
+
+    protected_queue.modifySizeLimits(getRatio(max_size_, size_ratio_), getRatio(max_elements_, size_ratio_), 0, lock);
+    probationary_queue.modifySizeLimits(getRatio(max_size_, 1 - size_ratio_), getRatio(max_elements_, 1 - size_ratio_), 0, lock);
+
+    max_size = max_size_;
+    max_elements = max_elements_;
+    size_ratio = size_ratio_;
+    return true;
+}
+
 SLRUFileCachePriority::SLRUIterator::SLRUIterator(
     SLRUFileCachePriority * cache_priority_,
     LRUFileCachePriority::LRUIterator && lru_iterator_,
diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h
index 45fc7ad8333..373e37a8fc9 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.h
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.h
@@ -18,7 +18,7 @@ private:
 public:
     class SLRUIterator;
 
-    SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio);
+    SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio_);
 
     size_t getSize(const CacheGuard::Lock & lock) const override;
 
@@ -45,7 +45,10 @@ public:
 
     std::vector<FileSegmentInfo> dump(FileCache & cache, const CacheGuard::Lock &) override;
 
+    bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override;
+
 private:
+    double size_ratio;
     LRUFileCachePriority protected_queue;
     LRUFileCachePriority probationary_queue;
     Poco::Logger * log = &Poco::Logger::get("SLRUFileCachePriority");
diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml
index 18652826d83..72248f668a7 100644
--- a/tests/config/config.d/storage_conf.xml
+++ b/tests/config/config.d/storage_conf.xml
@@ -29,6 +29,17 @@
                 <background_download_threads>0</background_download_threads>
                 <background_download_queue_size_limit>0</background_download_queue_size_limit>
             </s3_cache_02933>
+            <s3_cache_02944>
+                <type>cache</type>
+                <disk>s3disk</disk>
+                <path>s3_cache_02944/</path>
+                <max_size>100</max_size>
+                <max_elements>100</max_elements>
+                <max_file_segment_size>10</max_file_segment_size>
+                <boundary_alignment>10</boundary_alignment>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
+                <cache_on_write_operations>0</cache_on_write_operations>
+            </s3_cache_02944>
             <!-- local disks -->
             <local_disk>
                 <type>local_blob_storage</type>
diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
new file mode 100644
index 00000000000..7fa32ec2b09
--- /dev/null
+++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
@@ -0,0 +1,20 @@
+100	10	10	10	0	0	98	10	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+0
+10
+98
+set max_size from 100 to 10
+10	10	10	10	0	0	8	1	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+1
+8
+set max_size from 10 to 100
+100	10	10	10	0	0	8	1	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+10
+98
+set max_elements from 10 to 2
+100	2	10	10	0	0	18	2	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+2
+18
+set max_elements from 2 to 10
+100	10	10	10	0	0	18	2	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+10
+98
diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh
new file mode 100755
index 00000000000..021493eaa82
--- /dev/null
+++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-parallel, no-s3-storage
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+disk_name="s3_cache_02944"
+
+$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
+
+$CLICKHOUSE_CLIENT -nm --query "
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (a String) engine=MergeTree() ORDER BY tuple() SETTINGS disk = '$disk_name';
+INSERT INTO test SELECT randomString(100);
+SYSTEM DROP FILESYSTEM CACHE;
+"
+
+$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+
+$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null"
+
+$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+
+config_path=/etc/clickhouse-server/config.d/storage_conf.xml
+config_path_tmp=$config_path.tmp
+
+echo 'set max_size from 100 to 10'
+cat $config_path \
+| sed "s|<max_size>100<\/max_size>|<max_size>10<\/max_size>|" \
+> $config_path_tmp
+mv $config_path_tmp $config_path
+
+$CLICKHOUSE_CLIENT -nm --query "SYSTEM RELOAD CONFIG"
+$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
+
+$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+
+echo 'set max_size from 10 to 100'
+cat $config_path \
+| sed "s|<max_size>10<\/max_size>|<max_size>100<\/max_size>|" \
+> $config_path_tmp
+mv $config_path_tmp $config_path
+
+$CLICKHOUSE_CLIENT -nm --query "SYSTEM RELOAD CONFIG"
+$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
+
+$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null"
+
+$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+
+echo 'set max_elements from 10 to 2'
+cat $config_path \
+| sed "s|<max_elements>10<\/max_elements>|<max_elements>2<\/max_elements>|" \
+> $config_path_tmp
+mv $config_path_tmp $config_path
+
+$CLICKHOUSE_CLIENT -nm --query "SYSTEM RELOAD CONFIG"
+$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
+
+$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+
+echo 'set max_elements from 2 to 10'
+cat $config_path \
+| sed "s|<max_elements>2<\/max_elements>|<max_elements>10<\/max_elements>|" \
+> $config_path_tmp
+mv $config_path_tmp $config_path
+
+$CLICKHOUSE_CLIENT -nm --query "SYSTEM RELOAD CONFIG"
+$CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
+
+$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null"
+
+$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
+$CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"

From 6b4658ef5267dc166a9e43162f095c2ba233fc9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 15 Dec 2023 16:00:58 +0100
Subject: [PATCH 054/137] Mysql log format

---
 src/Common/mysqlxx/Query.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Common/mysqlxx/Query.cpp b/src/Common/mysqlxx/Query.cpp
index 42c35d26ecf..e30ed2b75c8 100644
--- a/src/Common/mysqlxx/Query.cpp
+++ b/src/Common/mysqlxx/Query.cpp
@@ -6,7 +6,7 @@
 #include <mysql/mysql.h>
 #endif
 
-#include <Poco/Logger.h>
+#include <Common/logger_useful.h>
 
 #include <mysqlxx/Connection.h>
 #include <mysqlxx/Query.h>
@@ -52,8 +52,7 @@ void Query::executeImpl()
 {
     MYSQL* mysql_driver = conn->getDriver();
 
-    auto & logger = Poco::Logger::get("mysqlxx::Query");
-    logger.trace("Running MySQL query using connection %lu", mysql_thread_id(mysql_driver));
+    LOG_TRACE(&Poco::Logger::get("mysqlxx::Query"), "Running MySQL query using connection {}", mysql_thread_id(mysql_driver));
     if (mysql_real_query(mysql_driver, query.data(), query.size()))
     {
         const auto err_no = mysql_errno(mysql_driver);

From 5b88c4ef059876df95d2a79f810df80f76d0649f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 15 Dec 2023 16:09:46 +0100
Subject: [PATCH 055/137] BackupEntriesCollector format log

---
 src/Backups/BackupEntriesCollector.cpp | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp
index a335b92fe3e..99e49026f2a 100644
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@@ -43,14 +43,6 @@ namespace Stage = BackupCoordinationStage;
 
 namespace
 {
-    /// Uppercases the first character of a passed string.
-    String toUpperFirst(const String & str)
-    {
-        String res = str;
-        res[0] = std::toupper(res[0]);
-        return res;
-    }
-
     /// Outputs "table <name>" or "temporary table <name>"
     String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper)
     {
@@ -164,7 +156,7 @@ BackupEntries BackupEntriesCollector::run()
 
 Strings BackupEntriesCollector::setStage(const String & new_stage, const String & message)
 {
-    LOG_TRACE(log, fmt::runtime(toUpperFirst(new_stage)));
+    LOG_TRACE(log, "Setting stage: {}", new_stage);
     current_stage = new_stage;
 
     backup_coordination->setStage(new_stage, message);

From e05cc51ef0a5ad61879919e37a5c9970f1e444f7 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 15 Dec 2023 16:17:30 +0100
Subject: [PATCH 056/137] Update storage_conf.xml

---
 tests/config/config.d/storage_conf.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml
index 72248f668a7..7a5caad9139 100644
--- a/tests/config/config.d/storage_conf.xml
+++ b/tests/config/config.d/storage_conf.xml
@@ -31,7 +31,7 @@
             </s3_cache_02933>
             <s3_cache_02944>
                 <type>cache</type>
-                <disk>s3disk</disk>
+                <disk>s3_disk</disk>
                 <path>s3_cache_02944/</path>
                 <max_size>100</max_size>
                 <max_elements>100</max_elements>

From a4d287b5767efb0c2587bd46c03d3f6600929515 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 15 Dec 2023 18:54:15 +0300
Subject: [PATCH 057/137] Fixed tests

---
 src/Interpreters/InterpreterCreateQuery.cpp   | 25 ++++++++++++++++---
 .../02174_cte_scalar_cache_mv.reference       |  4 +--
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 98f52d9037d..bf07f4ed3ee 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -801,7 +801,12 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
               * We can improve this in new analyzer, and execute scalar subqueries only in contexts when we expect constant
               * for example: LIMIT, OFFSET, functions parameters, functions constant only arguments.
               */
-            InterpreterSelectWithUnionQuery interpreter(create.select->clone(), getContext(), {});
+
+            SelectQueryOptions options;
+            if (create.isParameterizedView())
+                options = options.createParameterizedView();
+
+            InterpreterSelectWithUnionQuery interpreter(create.select->clone(), getContext(), options);
             as_select_sample = interpreter.getSampleBlock();
         }
 
@@ -1226,14 +1231,28 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
             getContext()
         ))
         {
+            Block input_block;
+
+            if (getContext()->getSettingsRef().allow_experimental_analyzer)
+            {
+                input_block = InterpreterSelectQueryAnalyzer::getSampleBlock(create.select->clone(), getContext());
+            }
+            else
+            {
+                input_block = InterpreterSelectWithUnionQuery(create.select->clone(),
+                    getContext(),
+                    {}).getSampleBlock();
+            }
+
             Block output_block = to_table->getInMemoryMetadataPtr()->getSampleBlock();
+
             ColumnsWithTypeAndName input_columns;
             ColumnsWithTypeAndName output_columns;
-            for (const auto & input_column : properties.columns)
+            for (const auto & input_column : input_block)
             {
                 if (const auto * output_column = output_block.findByName(input_column.name))
                 {
-                    input_columns.push_back({input_column.type, input_column.name});
+                    input_columns.push_back(input_column.cloneEmpty());
                     output_columns.push_back(output_column->cloneEmpty());
                 }
             }
diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference
index 055c88160ad..8ec3608317f 100644
--- a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference
+++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference
@@ -18,7 +18,7 @@
 89	89	89	89	5
 94	94	94	94	5
 99	99	99	99	5
-02177_MV	3	80	26
+02177_MV	7	80	22
 10
 40
 70
@@ -60,4 +60,4 @@
 178
 188
 198
-02177_MV_3	19	0	2
+02177_MV_3	20	0	1

From e4a831be17f85e345c5c9cd8f00bd22b8e759692 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 15 Dec 2023 17:40:03 +0100
Subject: [PATCH 058/137] normalize function names in CREATE INDEX

---
 src/Interpreters/InterpreterCreateFunctionQuery.cpp         | 2 ++
 src/Interpreters/InterpreterCreateIndexQuery.cpp            | 2 ++
 .../02487_create_index_normalize_functions.reference        | 0
 .../0_stateless/02487_create_index_normalize_functions.sql  | 6 ++++++
 4 files changed, 10 insertions(+)
 create mode 100644 tests/queries/0_stateless/02487_create_index_normalize_functions.reference
 create mode 100644 tests/queries/0_stateless/02487_create_index_normalize_functions.sql

diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp
index 3e87f4fe440..a7b17b1cdcf 100644
--- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp
+++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp
@@ -6,6 +6,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
 #include <Interpreters/removeOnClusterClauseIfNeeded.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 #include <Parsers/ASTCreateFunctionQuery.h>
 
 
@@ -19,6 +20,7 @@ namespace ErrorCodes
 
 BlockIO InterpreterCreateFunctionQuery::execute()
 {
+    FunctionNameNormalizer().visit(query_ptr.get());
     const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext());
     ASTCreateFunctionQuery & create_function_query = updated_query_ptr->as<ASTCreateFunctionQuery &>();
 
diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp
index 3b47a002e50..ed29c82a0f0 100644
--- a/src/Interpreters/InterpreterCreateIndexQuery.cpp
+++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp
@@ -4,6 +4,7 @@
 #include <Databases/DatabaseReplicated.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 #include <Parsers/ASTCreateIndexQuery.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTIndexDeclaration.h>
@@ -22,6 +23,7 @@ namespace ErrorCodes
 
 BlockIO InterpreterCreateIndexQuery::execute()
 {
+    FunctionNameNormalizer().visit(query_ptr.get());
     auto current_context = getContext();
     const auto & create_index = query_ptr->as<ASTCreateIndexQuery &>();
 
diff --git a/tests/queries/0_stateless/02487_create_index_normalize_functions.reference b/tests/queries/0_stateless/02487_create_index_normalize_functions.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02487_create_index_normalize_functions.sql b/tests/queries/0_stateless/02487_create_index_normalize_functions.sql
new file mode 100644
index 00000000000..2155f5d6665
--- /dev/null
+++ b/tests/queries/0_stateless/02487_create_index_normalize_functions.sql
@@ -0,0 +1,6 @@
+
+create table rmt (n int, ts DateTime64(8, 'UTC')) engine=ReplicatedMergeTree('/test/02487/{database}/rmt', '1') order by n;
+alter table rmt add index idx1 date(ts) TYPE MinMax GRANULARITY 1;
+create index idx2 on rmt date(ts) TYPE MinMax GRANULARITY 1;
+system restart replica rmt;
+create table rmt2 (n int, ts DateTime64(8, 'UTC'), index idx1 date(ts) TYPE MinMax GRANULARITY 1, index idx2 date(ts) TYPE MinMax GRANULARITY 1) engine=ReplicatedMergeTree('/test/02487/{database}/rmt', '2') order by n;

From b269f87f4c76671d34aec2e1dc7489b1b53281d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 15 Dec 2023 19:25:49 +0100
Subject: [PATCH 059/137] Better text_log with ErrnoException

---
 docs/en/development/style.md                  |  2 +-
 docs/ru/development/style.md                  |  2 +-
 docs/zh/development/style.md                  |  2 +-
 programs/benchmark/Benchmark.cpp              |  2 +-
 programs/install/Install.cpp                  | 10 +--
 programs/obfuscator/Obfuscator.cpp            |  4 +-
 programs/su/su.cpp                            | 10 +--
 src/Client/ClientBase.cpp                     | 18 ++---
 src/Common/AlignedBuffer.cpp                  |  8 ++-
 src/Common/Allocator.h                        | 13 ++--
 src/Common/ArrayCache.h                       |  4 +-
 src/Common/AsynchronousMetrics.cpp            |  2 +-
 src/Common/CounterInFile.h                    |  6 +-
 src/Common/Epoll.cpp                          |  8 +--
 src/Common/EventFD.cpp                        |  6 +-
 src/Common/Exception.cpp                      | 34 +++-------
 src/Common/Exception.h                        | 67 +++++++++++++++----
 src/Common/FiberStack.h                       |  4 +-
 src/Common/InterruptListener.h                | 11 ++-
 src/Common/MemoryStatisticsOS.cpp             | 12 ++--
 src/Common/NetlinkMetricsProvider.cpp         |  8 +--
 src/Common/PODArray.h                         |  2 +-
 src/Common/PipeFDs.cpp                        | 22 +++---
 src/Common/ProcfsMetricsProvider.cpp          | 13 ++--
 src/Common/QueryProfiler.cpp                  | 10 +--
 src/Common/ShellCommand.cpp                   |  6 +-
 src/Common/StatusFile.cpp                     |  8 +--
 src/Common/TerminalSize.cpp                   |  4 +-
 src/Common/ThreadFuzzer.cpp                   |  8 +--
 src/Common/TimerDescriptor.cpp                |  8 +--
 .../assertProcessUserMatchesDataOwner.cpp     |  3 +-
 src/Common/atomicRename.cpp                   | 17 +++--
 src/Common/checkStackSize.cpp                 |  6 +-
 src/Common/createHardLink.cpp                 | 16 +++--
 src/Common/examples/arena_with_free_lists.cpp |  2 +-
 .../examples/thread_creation_latency.cpp      |  4 +-
 src/Common/filesystemHelpers.cpp              | 27 +++-----
 src/Common/hasLinuxCapability.cpp             |  2 +-
 src/Common/isLocalAddress.cpp                 |  4 +-
 src/Common/randomSeed.cpp                     |  2 +-
 src/Common/remapExecutable.cpp                |  2 +-
 src/Common/setThreadName.cpp                  |  4 +-
 src/Daemon/BaseDaemon.cpp                     | 24 +++++--
 src/Databases/DatabaseOnDisk.cpp              |  7 +-
 src/Dictionaries/SSDCacheDictionaryStorage.h  | 21 +++---
 src/Disks/DiskLocal.cpp                       | 12 ++--
 src/Disks/IO/IOUringReader.cpp                |  2 +-
 src/Disks/IO/ThreadPoolReader.cpp             |  7 +-
 src/Disks/IO/WriteBufferFromTemporaryFile.cpp |  3 +-
 src/Disks/LocalDirectorySyncGuard.cpp         |  6 +-
 .../Local/LocalObjectStorage.cpp              |  2 +-
 src/Functions/now64.cpp                       |  2 +-
 src/Functions/trap.cpp                        |  2 +-
 src/IO/AIO.cpp                                |  4 +-
 src/IO/AsynchronousReadBufferFromFile.cpp     | 10 ++-
 src/IO/MMapReadBufferFromFile.cpp             |  4 +-
 src/IO/MMappedFile.cpp                        |  4 +-
 src/IO/MMappedFileDescriptor.cpp              |  8 +--
 src/IO/OpenedFile.cpp                         |  4 +-
 src/IO/ReadBufferFromFile.cpp                 | 10 ++-
 src/IO/ReadBufferFromFileDescriptor.cpp       | 17 +++--
 src/IO/SynchronousReader.cpp                  |  4 +-
 src/IO/WriteBufferFromFile.cpp                |  5 +-
 src/IO/WriteBufferFromFileDescriptor.cpp      | 13 ++--
 src/Interpreters/InterpreterSystemQuery.cpp   |  2 +-
 src/Interpreters/JIT/CHJIT.cpp                | 12 ++--
 src/Interpreters/ThreadStatusExt.cpp          |  2 +-
 src/Loggers/ExtendedLogChannel.cpp            |  2 +-
 src/Processors/Executors/PollingQueue.cpp     |  4 +-
 src/Processors/Sources/ShellCommandSource.cpp | 10 +--
 .../RemoteQueryExecutorReadContext.cpp        |  4 +-
 src/Server/KeeperTCPHandler.cpp               |  6 +-
 src/Server/MySQLHandlerFactory.cpp            | 12 ++--
 src/Storages/FileLog/DirectoryWatcherBase.cpp |  4 +-
 src/Storages/HDFS/WriteBufferFromHDFS.cpp     |  3 +-
 src/Storages/StorageFile.cpp                  | 10 +--
 .../System/StorageSystemStackTrace.cpp        | 12 ++--
 utils/keeper-bench/Runner.cpp                 |  2 +-
 78 files changed, 353 insertions(+), 306 deletions(-)

diff --git a/docs/en/development/style.md b/docs/en/development/style.md
index 0b71a669638..77a550f2a0e 100644
--- a/docs/en/development/style.md
+++ b/docs/en/development/style.md
@@ -489,7 +489,7 @@ When using functions with response codes or `errno`, always check the result and
 
 ``` cpp
 if (0 != close(fd))
-    throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE);
+    throw ErrnoException(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file {}", file_name);
 ```
 
 You can use assert to check invariant in code.
diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md
index 49c4aade4e9..cd1297504af 100644
--- a/docs/ru/development/style.md
+++ b/docs/ru/development/style.md
@@ -493,7 +493,7 @@ catch (const DB::Exception & e)
 
 ``` cpp
 if (0 != close(fd))
-    throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE);
+    throw ErrnoException(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file {}", file_name);
 ```
 
 `assert` не используются.
diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md
index 977b4dce92a..c0a08291e02 100644
--- a/docs/zh/development/style.md
+++ b/docs/zh/development/style.md
@@ -485,7 +485,7 @@ catch (const DB::Exception & e)
 
 ``` cpp
 if (0 != close(fd))
-    throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE);
+    throw ErrnoException(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file {}", file_name);
 ```
 
 `不要使用断言`。
diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index d6b8b38d84d..ef24eeaa6d7 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -405,7 +405,7 @@ private:
             || sigaddset(&sig_set, SIGINT)
             || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
         {
-            throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL);
+            throw ErrnoException(ErrorCodes::CANNOT_BLOCK_SIGNAL, "Cannot block signal");
         }
 
         while (true)
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index 9d4d791263b..52f30098b38 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -328,7 +328,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
                 fs::create_symlink(binary_self_canonical_path, main_bin_path);
 
                 if (0 != chmod(binary_self_canonical_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
-                    throwFromErrno(fmt::format("Cannot chmod {}", binary_self_canonical_path.string()), ErrorCodes::SYSTEM_ERROR);
+                    throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot chmod {}", binary_self_canonical_path.string());
             }
         }
         else
@@ -361,7 +361,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
             if (already_installed)
             {
                 if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
-                    throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
+                    throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot chmod {}", main_bin_path.string());
             }
             else
             {
@@ -395,7 +395,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
                     }
 
                     if (0 != chmod(destination.c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
-                        throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
+                        throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot chmod {}", main_bin_tmp_path.string());
                 }
                 catch (const Exception & e)
                 {
@@ -1122,7 +1122,7 @@ namespace
                 return 0;
             }
             else
-                throwFromErrno(fmt::format("Cannot obtain the status of pid {} with `kill`", pid), ErrorCodes::CANNOT_KILL);
+                throw ErrnoException(ErrorCodes::CANNOT_KILL, "Cannot obtain the status of pid {} with `kill`", pid);
         }
 
         if (!pid)
@@ -1143,7 +1143,7 @@ namespace
         if (0 == kill(pid, signal))
             fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid);
         else
-            throwFromErrno(fmt::format("Cannot send {} signal", signal_name), ErrorCodes::SYSTEM_ERROR);
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot send {} signal", signal_name);
 
         size_t try_num = 0;
         for (; try_num < max_tries; ++try_num)
diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp
index 2cb5250cdf2..7e09d5e8046 100644
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@@ -1307,7 +1307,7 @@ try
         /// stdin must be seekable
         auto res = lseek(file->getFD(), 0, SEEK_SET);
         if (-1 == res)
-            throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+            throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)");
 
         SingleReadBufferIterator read_buffer_iterator(std::move(file));
         schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const);
@@ -1336,7 +1336,7 @@ try
         /// stdin must be seekable
         auto res = lseek(file_in.getFD(), 0, SEEK_SET);
         if (-1 == res)
-            throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+            throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)");
     }
 
     Obfuscator obfuscator(header, seed, markov_model_params);
diff --git a/programs/su/su.cpp b/programs/su/su.cpp
index cebd05b3eb1..a8f61fb32b6 100644
--- a/programs/su/su.cpp
+++ b/programs/su/su.cpp
@@ -56,7 +56,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid)
             group * result{};
 
             if (0 != getgrnam_r(arg_gid.data(), &entry, buf.get(), buf_size, &result))
-                throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid), ErrorCodes::SYSTEM_ERROR);
+                throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid);
 
             if (!result)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group {} is not found in the system", arg_gid);
@@ -68,7 +68,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group has id 0, but dropping privileges to gid 0 does not make sense");
 
         if (0 != setgid(gid))
-            throwFromErrno(fmt::format("Cannot do 'setgid' to user ({})", arg_gid), ErrorCodes::SYSTEM_ERROR);
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'setgid' to user ({})", arg_gid);
     }
 
     if (!arg_uid.empty())
@@ -81,7 +81,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid)
             passwd * result{};
 
             if (0 != getpwnam_r(arg_uid.data(), &entry, buf.get(), buf_size, &result))
-                throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid), ErrorCodes::SYSTEM_ERROR);
+                throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid);
 
             if (!result)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "User {} is not found in the system", arg_uid);
@@ -93,7 +93,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "User has id 0, but dropping privileges to uid 0 does not make sense");
 
         if (0 != setuid(uid))
-            throwFromErrno(fmt::format("Cannot do 'setuid' to user ({})", arg_uid), ErrorCodes::SYSTEM_ERROR);
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'setuid' to user ({})", arg_uid);
     }
 }
 
@@ -136,7 +136,7 @@ try
 
     execvp(new_argv.front(), new_argv.data());
 
-    throwFromErrno("Cannot execvp", ErrorCodes::SYSTEM_ERROR);
+    throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot execvp");
 }
 catch (...)
 {
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index a20522993ea..e099aac0de9 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -318,14 +318,14 @@ void ClientBase::setupSignalHandler()
     sigemptyset(&new_act.sa_mask);
 #else
     if (sigemptyset(&new_act.sa_mask))
-        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
 #endif
 
     if (sigaction(SIGINT, &new_act, nullptr))
-        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
 
     if (sigaction(SIGQUIT, &new_act, nullptr))
-        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
 }
 
 
@@ -543,16 +543,16 @@ try
         if (!pager.empty())
         {
             if (SIG_ERR == signal(SIGPIPE, SIG_IGN))
-                throwFromErrno("Cannot set signal handler for SIGPIPE.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+                throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler for SIGPIPE");
             /// We need to reset signals that had been installed in the
             /// setupSignalHandler() since terminal will send signals to both
             /// processes and so signals will be delivered to the
             /// clickhouse-client/local as well, which will be terminated when
             /// signal will be delivered second time.
             if (SIG_ERR == signal(SIGINT, SIG_IGN))
-                throwFromErrno("Cannot set signal handler for SIGINT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+                throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler for SIGINT");
             if (SIG_ERR == signal(SIGQUIT, SIG_IGN))
-                throwFromErrno("Cannot set signal handler for SIGQUIT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+                throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler for SIGQUIT");
 
             ShellCommand::Config config(pager);
             config.pipe_stdin_only = true;
@@ -1306,11 +1306,11 @@ void ClientBase::resetOutput()
         pager_cmd->wait();
 
         if (SIG_ERR == signal(SIGPIPE, SIG_DFL))
-            throwFromErrno("Cannot set signal handler for SIIGPIEP.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+            throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler for SIGPIPE");
         if (SIG_ERR == signal(SIGINT, SIG_DFL))
-            throwFromErrno("Cannot set signal handler for SIGINT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+            throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler for SIGINT");
         if (SIG_ERR == signal(SIGQUIT, SIG_DFL))
-            throwFromErrno("Cannot set signal handler for SIGQUIT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+            throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler for SIGQUIT");
 
         setupSignalHandler();
     }
diff --git a/src/Common/AlignedBuffer.cpp b/src/Common/AlignedBuffer.cpp
index f1d3f98ff3a..5e11f16f106 100644
--- a/src/Common/AlignedBuffer.cpp
+++ b/src/Common/AlignedBuffer.cpp
@@ -18,9 +18,11 @@ void AlignedBuffer::alloc(size_t size, size_t alignment)
     void * new_buf;
     int res = ::posix_memalign(&new_buf, std::max(alignment, sizeof(void*)), size);
     if (0 != res)
-        throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign), size: {}, alignment: {}.",
-            ReadableSize(size), ReadableSize(alignment)),
-            ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
+        throw ErrnoException(
+            ErrorCodes::CANNOT_ALLOCATE_MEMORY,
+            "Cannot allocate memory (posix_memalign), size: {}, alignment: {}.",
+            ReadableSize(size),
+            ReadableSize(alignment));
     buf = new_buf;
 }
 
diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h
index a85274e1f69..269e23f3719 100644
--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@@ -118,8 +118,11 @@ public:
             void * new_buf = ::realloc(buf, new_size);
             if (nullptr == new_buf)
             {
-                DB::throwFromErrno(
-                    fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+                throw DB::ErrnoException(
+                    DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
+                    "Allocator: Cannot realloc from {} to {}",
+                    ReadableSize(old_size),
+                    ReadableSize(new_size));
             }
 
             buf = new_buf;
@@ -164,7 +167,7 @@ private:
                 buf = ::malloc(size);
 
             if (nullptr == buf)
-                DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+                throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot malloc {}.", ReadableSize(size));
         }
         else
         {
@@ -172,8 +175,8 @@ private:
             int res = posix_memalign(&buf, alignment, size);
 
             if (0 != res)
-                DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
-                    DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
+                throw DB::ErrnoException(
+                    DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Cannot allocate memory (posix_memalign) {}.", ReadableSize(size));
 
             if constexpr (clear_memory)
                 memset(buf, 0, size);
diff --git a/src/Common/ArrayCache.h b/src/Common/ArrayCache.h
index 79aeddb09df..3584ffd5ad4 100644
--- a/src/Common/ArrayCache.h
+++ b/src/Common/ArrayCache.h
@@ -179,13 +179,13 @@ private:
         {
             ptr = mmap(address_hint, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
             if (MAP_FAILED == ptr)
-                DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+                throw ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}.", ReadableSize(size));
         }
 
         ~Chunk()
         {
             if (ptr && 0 != munmap(ptr, size))
-                DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
+                throw ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot munmap {}.", ReadableSize(size));
         }
 
         Chunk(Chunk && other) noexcept : ptr(other.ptr), size(other.size)
diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index 36c87010fa5..9df6d22df04 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -797,7 +797,7 @@ void AsynchronousMetrics::update(TimePoint update_time)
 
             int64_t hz = sysconf(_SC_CLK_TCK);
             if (-1 == hz)
-                throwFromErrno("Cannot call 'sysconf' to obtain system HZ", ErrorCodes::CANNOT_SYSCONF);
+                throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
 
             double multiplier = 1.0 / hz / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_after_previous_update).count() / 1e9);
             size_t num_cpus = 0;
diff --git a/src/Common/CounterInFile.h b/src/Common/CounterInFile.h
index fe3b74173f6..993ed97966a 100644
--- a/src/Common/CounterInFile.h
+++ b/src/Common/CounterInFile.h
@@ -69,13 +69,13 @@ public:
 
         int fd = ::open(path.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0666);
         if (-1 == fd)
-            DB::throwFromErrnoWithPath("Cannot open file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE);
+            DB::ErrnoException::throwFromPath(DB::ErrorCodes::CANNOT_OPEN_FILE, path, "Cannot open file {}", path);
 
         try
         {
             int flock_ret = flock(fd, LOCK_EX);
             if (-1 == flock_ret)
-                DB::throwFromErrnoWithPath("Cannot lock file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE);
+                DB::ErrnoException::throwFromPath(DB::ErrorCodes::CANNOT_OPEN_FILE, path, "Cannot lock file {}", path);
 
             if (!file_doesnt_exists)
             {
@@ -145,7 +145,7 @@ public:
 
         int fd = ::open(path.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0666);
         if (-1 == fd)
-            DB::throwFromErrnoWithPath("Cannot open file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE);
+            DB::ErrnoException::throwFromPath(DB::ErrorCodes::CANNOT_OPEN_FILE, path, "Cannot open file {}", path);
 
         try
         {
diff --git a/src/Common/Epoll.cpp b/src/Common/Epoll.cpp
index ac06f044beb..49c86222cf0 100644
--- a/src/Common/Epoll.cpp
+++ b/src/Common/Epoll.cpp
@@ -19,7 +19,7 @@ Epoll::Epoll() : events_count(0)
 {
     epoll_fd = epoll_create1(0);
     if (epoll_fd == -1)
-        throwFromErrno("Cannot open epoll descriptor", DB::ErrorCodes::EPOLL_ERROR);
+        throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot open epoll descriptor");
 }
 
 Epoll::Epoll(Epoll && other) noexcept : epoll_fd(other.epoll_fd), events_count(other.events_count.load())
@@ -47,7 +47,7 @@ void Epoll::add(int fd, void * ptr, uint32_t events)
     ++events_count;
 
     if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1)
-        throwFromErrno("Cannot add new descriptor to epoll", DB::ErrorCodes::EPOLL_ERROR);
+        throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot add new descriptor to epoll");
 }
 
 void Epoll::remove(int fd)
@@ -55,7 +55,7 @@ void Epoll::remove(int fd)
     --events_count;
 
     if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, nullptr) == -1)
-        throwFromErrno("Cannot remove descriptor from epoll", DB::ErrorCodes::EPOLL_ERROR);
+        throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot remove descriptor from epoll");
 }
 
 size_t Epoll::getManyReady(int max_events, epoll_event * events_out, int timeout) const
@@ -82,7 +82,7 @@ size_t Epoll::getManyReady(int max_events, epoll_event * events_out, int timeout
                 continue;
             }
             else
-                throwFromErrno("Error in epoll_wait", DB::ErrorCodes::EPOLL_ERROR);
+                throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Error in epoll_wait");
         }
         else
             break;
diff --git a/src/Common/EventFD.cpp b/src/Common/EventFD.cpp
index af50ca62271..9ec7f128420 100644
--- a/src/Common/EventFD.cpp
+++ b/src/Common/EventFD.cpp
@@ -21,7 +21,7 @@ EventFD::EventFD()
 {
     fd = eventfd(0 /* initval */, 0 /* flags */);
     if (fd == -1)
-        throwFromErrno("Cannot create eventfd", ErrorCodes::CANNOT_PIPE);
+        throw ErrnoException(ErrorCodes::CANNOT_PIPE, "Cannot create eventfd");
 }
 
 uint64_t EventFD::read() const
@@ -33,7 +33,7 @@ uint64_t EventFD::read() const
             break;
 
         if (errno != EINTR)
-            throwFromErrno("Cannot read from eventfd", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+            throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from eventfd");
     }
 
     return buf;
@@ -47,7 +47,7 @@ bool EventFD::write(uint64_t increase) const
             return false;
 
         if (errno != EINTR)
-            throwFromErrno("Cannot write to eventfd", ErrorCodes::CANNOT_WRITE_TO_SOCKET);
+            throw ErrnoException(ErrorCodes::CANNOT_WRITE_TO_SOCKET, "Cannot write to eventfd");
     }
 
     return true;
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index ed9fb00241d..d5f1984a5ff 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -1,25 +1,24 @@
 #include "Exception.h"
 
 #include <algorithm>
-#include <cstring>
-#include <cxxabi.h>
 #include <cstdlib>
-#include <Poco/String.h>
-#include <Common/logger_useful.h>
-#include <IO/WriteHelpers.h>
-#include <IO/ReadHelpers.h>
+#include <cstring>
+#include <filesystem>
+#include <cxxabi.h>
 #include <IO/Operators.h>
-#include <IO/ReadBufferFromString.h>
 #include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
 #include <base/demangle.h>
-#include <base/errnoToString.h>
-#include <Common/formatReadable.h>
-#include <Common/filesystemHelpers.h>
+#include <Poco/String.h>
 #include <Common/ErrorCodes.h>
+#include <Common/LockMemoryExceptionInThread.h>
 #include <Common/MemorySanitizer.h>
 #include <Common/SensitiveDataMasker.h>
-#include <Common/LockMemoryExceptionInThread.h>
-#include <filesystem>
+#include <Common/filesystemHelpers.h>
+#include <Common/formatReadable.h>
+#include <Common/logger_useful.h>
 
 #include <Common/config_version.h>
 
@@ -212,17 +211,6 @@ Exception::FramePointers Exception::getStackFramePointers() const
 thread_local bool Exception::enable_job_stack_trace = false;
 thread_local std::vector<StackTrace::FramePointers> Exception::thread_frame_pointers = {};
 
-
-void throwFromErrno(const std::string & s, int code, int the_errno)
-{
-    throw ErrnoException(s + ", " + errnoToString(the_errno), code, the_errno);
-}
-
-void throwFromErrnoWithPath(const std::string & s, const std::string & path, int code, int the_errno)
-{
-    throw ErrnoException(s + ", " + errnoToString(the_errno), code, the_errno, path);
-}
-
 static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string & start_of_message)
 {
     try
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index ac116f5ceca..9b2507794bb 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -7,9 +7,10 @@
 #include <Poco/Exception.h>
 
 #include <base/defines.h>
+#include <base/errnoToString.h>
 #include <base/scope_guard.h>
-#include <Common/StackTrace.h>
 #include <Common/LoggingFormatStringHelpers.h>
+#include <Common/StackTrace.h>
 
 #include <fmt/format.h>
 
@@ -173,12 +174,61 @@ std::string getExceptionStackTraceString(const std::exception & e);
 std::string getExceptionStackTraceString(std::exception_ptr e);
 
 
-/// Contains an additional member `saved_errno`. See the throwFromErrno function.
+/// Contains an additional member `saved_errno`
 class ErrnoException : public Exception
 {
 public:
-    ErrnoException(const std::string & msg, int code, int saved_errno_, const std::optional<std::string> & path_ = {})
-        : Exception(msg, code), saved_errno(saved_errno_), path(path_) {}
+    ErrnoException(std::string && msg, int code, int with_errno) : Exception(msg, code), saved_errno(with_errno)
+    {
+        capture_thread_frame_pointers = thread_frame_pointers;
+        addMessage(", {}", errnoToString(saved_errno));
+    }
+
+    /// Message must be a compile-time constant
+    template <typename T>
+    requires std::is_convertible_v<T, String>
+    ErrnoException(int code, T && message) : Exception(message, code), saved_errno(errno)
+    {
+        capture_thread_frame_pointers = thread_frame_pointers;
+        addMessage(", {}", errnoToString(saved_errno));
+    }
+
+    // Format message with fmt::format, like the logging functions.
+    template <typename... Args>
+    ErrnoException(int code, FormatStringHelper<Args...> fmt, Args &&... args)
+        : Exception(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code), saved_errno(errno)
+    {
+        capture_thread_frame_pointers = thread_frame_pointers;
+        message_format_string = fmt.message_format_string;
+        addMessage(", {}", errnoToString(saved_errno));
+    }
+
+    template <typename... Args>
+    [[noreturn]] static void throwWithErrno(int code, int with_errno, FormatStringHelper<Args...> fmt, Args &&... args)
+    {
+        auto e = ErrnoException(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code, with_errno);
+        e.message_format_string = fmt.message_format_string;
+        throw e;
+    }
+
+    template <typename... Args>
+    [[noreturn]] static void throwFromPath(int code, const std::string & path, FormatStringHelper<Args...> fmt, Args &&... args)
+    {
+        auto e = ErrnoException(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code, errno);
+        e.message_format_string = fmt.message_format_string;
+        e.path = path;
+        throw e;
+    }
+
+    template <typename... Args>
+    [[noreturn]] static void
+    throwFromPathWithErrno(int code, const std::string & path, int with_errno, FormatStringHelper<Args...> fmt, Args &&... args)
+    {
+        auto e = ErrnoException(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code, with_errno);
+        e.message_format_string = fmt.message_format_string;
+        e.path = path;
+        throw e;
+    }
 
     ErrnoException * clone() const override { return new ErrnoException(*this); }
     void rethrow() const override { throw *this; } // NOLINT
@@ -188,7 +238,7 @@ public:
 
 private:
     int saved_errno;
-    std::optional<std::string> path;
+    std::optional<std::string> path{};
 
     const char * name() const noexcept override { return "DB::ErrnoException"; }
     const char * className() const noexcept override { return "DB::ErrnoException"; }
@@ -233,13 +283,6 @@ private:
 
 using Exceptions = std::vector<std::exception_ptr>;
 
-
-[[noreturn]] void throwFromErrno(const std::string & s, int code, int the_errno = errno);
-/// Useful to produce some extra information about available space and inodes on device
-[[noreturn]] void throwFromErrnoWithPath(const std::string & s, const std::string & path, int code,
-                                         int the_errno = errno);
-
-
 /** Try to write an exception to the log (and forget about it).
   * Can be used in destructors in the catch-all block.
   */
diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h
index 91bb632d807..067b0aa7a63 100644
--- a/src/Common/FiberStack.h
+++ b/src/Common/FiberStack.h
@@ -46,14 +46,14 @@ public:
 
         void * vp = ::mmap(nullptr, num_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
         if (MAP_FAILED == vp)
-            DB::throwFromErrno(fmt::format("FiberStack: Cannot mmap {}.", ReadableSize(num_bytes)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+            throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "FiberStack: Cannot mmap {}.", ReadableSize(num_bytes));
 
         /// TODO: make reports on illegal guard page access more clear.
         /// Currently we will see segfault and almost random stacktrace.
         if (-1 == ::mprotect(vp, page_size, PROT_NONE))
         {
             ::munmap(vp, num_bytes);
-            DB::throwFromErrno("FiberStack: cannot protect guard page", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+            throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "FiberStack: cannot protect guard page");
         }
 
         /// Do not count guard page in memory usage.
diff --git a/src/Common/InterruptListener.h b/src/Common/InterruptListener.h
index b8b2ba6be7d..1f0f021fb03 100644
--- a/src/Common/InterruptListener.h
+++ b/src/Common/InterruptListener.h
@@ -58,9 +58,8 @@ private:
 public:
     InterruptListener() : active(false)
     {
-        if (sigemptyset(&sig_set)
-            || sigaddset(&sig_set, SIGINT))
-            throwFromErrno("Cannot manipulate with signal set.", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
+        if (sigemptyset(&sig_set) || sigaddset(&sig_set, SIGINT))
+            throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Cannot manipulate with signal set");
 
         block();
     }
@@ -82,7 +81,7 @@ public:
             if (errno == EAGAIN)
                 return false;
             else
-                throwFromErrno("Cannot poll signal (sigtimedwait).", ErrorCodes::CANNOT_WAIT_FOR_SIGNAL);
+                throw ErrnoException(ErrorCodes::CANNOT_WAIT_FOR_SIGNAL, "Cannot poll signal (sigtimedwait)");
         }
 
         return true;
@@ -93,7 +92,7 @@ public:
         if (!active)
         {
             if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
-                throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL);
+                throw ErrnoException(ErrorCodes::CANNOT_BLOCK_SIGNAL, "Cannot block signal");
 
             active = true;
         }
@@ -105,7 +104,7 @@ public:
         if (active)
         {
             if (pthread_sigmask(SIG_UNBLOCK, &sig_set, nullptr))
-                throwFromErrno("Cannot unblock signal.", ErrorCodes::CANNOT_UNBLOCK_SIGNAL);
+                throw ErrnoException(ErrorCodes::CANNOT_UNBLOCK_SIGNAL, "Cannot unblock signal");
 
             active = false;
         }
diff --git a/src/Common/MemoryStatisticsOS.cpp b/src/Common/MemoryStatisticsOS.cpp
index f2d2ab5fea9..2092c679336 100644
--- a/src/Common/MemoryStatisticsOS.cpp
+++ b/src/Common/MemoryStatisticsOS.cpp
@@ -39,7 +39,8 @@ MemoryStatisticsOS::MemoryStatisticsOS()
     fd = ::open(filename, O_RDONLY | O_CLOEXEC);
 
     if (-1 == fd)
-        throwFromErrno("Cannot open file " + std::string(filename), errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, filename, "Cannot open file {}", filename);
 }
 
 MemoryStatisticsOS::~MemoryStatisticsOS()
@@ -48,9 +49,8 @@ MemoryStatisticsOS::~MemoryStatisticsOS()
     {
         try
         {
-            throwFromErrno(
-                    "File descriptor for \"" + std::string(filename) + "\" could not be closed. "
-                    "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE);
+            ErrnoException::throwFromPath(
+                ErrorCodes::CANNOT_CLOSE_FILE, filename, "File descriptor for '{}' could not be closed", filename);
         }
         catch (const ErrnoException &)
         {
@@ -77,7 +77,7 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const
             if (errno == EINTR)
                 continue;
 
-            throwFromErrno("Cannot read from file " + std::string(filename), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, filename, "Cannot read from file {}", filename);
         }
 
         assert(res >= 0);
@@ -136,7 +136,7 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const
     size_t len = sizeof(struct kinfo_proc);
 
     if (-1 == ::sysctl(mib, 4, &kp, &len, nullptr, 0))
-        throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::SYSTEM_ERROR);
+        throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot sysctl(kern.proc.pid.{})", std::to_string(self));
 
     if (sizeof(struct kinfo_proc) != len)
         throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Kernel returns structure of {} bytes instead of expected {}",
diff --git a/src/Common/NetlinkMetricsProvider.cpp b/src/Common/NetlinkMetricsProvider.cpp
index 4c228bcc6fc..23173f31689 100644
--- a/src/Common/NetlinkMetricsProvider.cpp
+++ b/src/Common/NetlinkMetricsProvider.cpp
@@ -117,7 +117,7 @@ struct NetlinkMessage
                 if (errno == EAGAIN)
                     continue;
                 else
-                    throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR);
+                    throw ErrnoException(ErrorCodes::NETLINK_ERROR, "Can't send a Netlink command");
             }
 
             if (bytes_sent > request_size)
@@ -255,7 +255,7 @@ NetlinkMetricsProvider::NetlinkMetricsProvider()
 {
     netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
     if (netlink_socket_fd < 0)
-        throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
+        throw ErrnoException(ErrorCodes::NETLINK_ERROR, "Can't create PF_NETLINK socket");
 
     try
     {
@@ -267,7 +267,7 @@ NetlinkMetricsProvider::NetlinkMetricsProvider()
         tv.tv_usec = 50000;
 
         if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<const char *>(&tv), sizeof(tv)))
-            throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
+            throw ErrnoException(ErrorCodes::NETLINK_ERROR, "Can't set timeout on PF_NETLINK socket");
 
         union
         {
@@ -277,7 +277,7 @@ NetlinkMetricsProvider::NetlinkMetricsProvider()
         addr.nl_family = AF_NETLINK;
 
         if (::bind(netlink_socket_fd, &sockaddr, sizeof(addr)) < 0)
-            throwFromErrno("Can't bind PF_NETLINK socket", ErrorCodes::NETLINK_ERROR);
+            throw ErrnoException(ErrorCodes::NETLINK_ERROR, "Can't bind PF_NETLINK socket");
 
         taskstats_family_id = getFamilyId(netlink_socket_fd);
     }
diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h
index 68c1e325f0c..77cecf694f3 100644
--- a/src/Common/PODArray.h
+++ b/src/Common/PODArray.h
@@ -209,7 +209,7 @@ protected:
         {
             size_t length = right_rounded_down - left_rounded_up;
             if (0 != mprotect(left_rounded_up, length, prot))
-                throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT);
+                throw ErrnoException(ErrorCodes::CANNOT_MPROTECT, "Cannot mprotect memory region");
         }
     }
 
diff --git a/src/Common/PipeFDs.cpp b/src/Common/PipeFDs.cpp
index 21a9ae59972..f2a913467a9 100644
--- a/src/Common/PipeFDs.cpp
+++ b/src/Common/PipeFDs.cpp
@@ -29,14 +29,14 @@ void LazyPipeFDs::open()
 
 #ifndef OS_DARWIN
     if (0 != pipe2(fds_rw, O_CLOEXEC))
-        throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE);
+        throw ErrnoException(ErrorCodes::CANNOT_PIPE, "Cannot create pipe");
 #else
     if (0 != pipe(fds_rw))
-        throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE);
+        throw ErrnoException(ErrorCodes::CANNOT_PIPE, "Cannot create pipe");
     if (0 != fcntl(fds_rw[0], F_SETFD, FD_CLOEXEC))
-        throwFromErrno("Cannot setup auto-close on exec for read end of pipe", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot setup auto-close on exec for read end of pipe");
     if (0 != fcntl(fds_rw[1], F_SETFD, FD_CLOEXEC))
-        throwFromErrno("Cannot setup auto-close on exec for write end of pipe", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot setup auto-close on exec for write end of pipe");
 #endif
 }
 
@@ -47,7 +47,7 @@ void LazyPipeFDs::close()
         if (fd < 0)
             continue;
         if (0 != ::close(fd))
-            throwFromErrno("Cannot close pipe", ErrorCodes::CANNOT_PIPE);
+            throw ErrnoException(ErrorCodes::CANNOT_PIPE, "Cannot close pipe");
         fd = -1;
     }
 }
@@ -74,18 +74,18 @@ void LazyPipeFDs::setNonBlockingWrite()
 {
     int flags = fcntl(fds_rw[1], F_GETFL, 0);
     if (-1 == flags)
-        throwFromErrno("Cannot get file status flags of pipe", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot get file status flags of pipe");
     if (-1 == fcntl(fds_rw[1], F_SETFL, flags | O_NONBLOCK))
-        throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot set non-blocking mode of pipe");
 }
 
 void LazyPipeFDs::setNonBlockingRead()
 {
     int flags = fcntl(fds_rw[0], F_GETFL, 0);
     if (-1 == flags)
-        throwFromErrno("Cannot get file status flags of pipe", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot get file status flags of pipe");
     if (-1 == fcntl(fds_rw[0], F_SETFL, flags | O_NONBLOCK))
-        throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot set non-blocking mode of pipe");
 }
 
 void LazyPipeFDs::setNonBlockingReadWrite()
@@ -110,13 +110,13 @@ void LazyPipeFDs::tryIncreaseSize(int desired_size)
             /// It will work nevertheless.
         }
         else
-            throwFromErrno("Cannot get pipe capacity", ErrorCodes::CANNOT_FCNTL);
+            throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot get pipe capacity");
     }
     else
     {
         for (errno = 0; errno != EPERM && pipe_size < desired_size; pipe_size *= 2)
             if (-1 == fcntl(fds_rw[1], F_SETPIPE_SZ, pipe_size * 2) && errno != EPERM)
-                throwFromErrno("Cannot increase pipe capacity to " + std::to_string(pipe_size * 2), ErrorCodes::CANNOT_FCNTL);
+                throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot increase pipe capacity to {}", pipe_size * 2);
 
         LOG_TRACE(log, "Pipe capacity is {}", ReadableSize(std::min(pipe_size, desired_size)));
     }
diff --git a/src/Common/ProcfsMetricsProvider.cpp b/src/Common/ProcfsMetricsProvider.cpp
index 7a94cecee5e..194053cd271 100644
--- a/src/Common/ProcfsMetricsProvider.cpp
+++ b/src/Common/ProcfsMetricsProvider.cpp
@@ -37,18 +37,15 @@ namespace
 {
 [[noreturn]] inline void throwWithFailedToOpenFile(const std::string & filename)
 {
-    throwFromErrno(
-            "Cannot open file " + filename,
-            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+    ErrnoException::throwFromPath(
+        errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, filename, "Cannot open file {}", filename);
 }
 
 inline void emitErrorMsgWithFailedToCloseFile(const std::string & filename)
 {
     try
     {
-        throwFromErrno(
-                "File descriptor for \"" + filename + "\" could not be closed. "
-                "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_CLOSE_FILE, filename, "File descriptor for {} could not be closed", filename);
     }
     catch (const ErrnoException &)
     {
@@ -69,9 +66,7 @@ ssize_t readFromFD(const int fd, const char * filename, char * buf, size_t buf_s
             if (errno == EINTR)
                 continue;
 
-            throwFromErrno(
-                    "Cannot read from file " + std::string(filename),
-                    ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, filename, "Cannot read from file {}", filename);
         }
 
         assert(res >= 0);
diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp
index c656e7f992f..7398c62a882 100644
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@@ -141,7 +141,7 @@ void Timer::createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal
 
             /// Also, it cannot be created if the server has too many threads.
 
-            throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER);
+            throw ErrnoException(ErrorCodes::CANNOT_CREATE_TIMER, "Failed to create thread timer");
         }
         timer_id.emplace(local_timer_id);
         CurrentMetrics::add(CurrentMetrics::CreatedTimersInQueryProfiler);
@@ -164,7 +164,7 @@ void Timer::set(UInt32 period)
 
     struct itimerspec timer_spec = {.it_interval = interval, .it_value = offset};
     if (timer_settime(*timer_id, 0, &timer_spec, nullptr))
-        throwFromErrno("Failed to set thread timer period", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
+        throw ErrnoException(ErrorCodes::CANNOT_SET_TIMER_PERIOD, "Failed to set thread timer period");
     CurrentMetrics::add(CurrentMetrics::ActiveTimersInQueryProfiler);
 }
 
@@ -238,13 +238,13 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(UInt64 thread_id, int clock_t
     sa.sa_flags = SA_SIGINFO | SA_RESTART;
 
     if (sigemptyset(&sa.sa_mask))
-        throwFromErrno("Failed to clean signal mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
+        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to clean signal mask for query profiler");
 
     if (sigaddset(&sa.sa_mask, pause_signal))
-        throwFromErrno("Failed to add signal to mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
+        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to add signal to mask for query profiler");
 
     if (sigaction(pause_signal, &sa, nullptr))
-        throwFromErrno("Failed to setup signal handler for query profiler", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to setup signal handler for query profiler");
 
     try
     {
diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp
index 5550b68c824..f4efc9e3526 100644
--- a/src/Common/ShellCommand.cpp
+++ b/src/Common/ShellCommand.cpp
@@ -145,7 +145,7 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
 #endif
 
     if (!real_vfork)
-        throwFromErrno("Cannot find symbol vfork in myself", ErrorCodes::CANNOT_DLSYM);
+        throw ErrnoException(ErrorCodes::CANNOT_DLSYM, "Cannot find symbol vfork in myself");
 
     PipeFDs pipe_stdin;
     PipeFDs pipe_stdout;
@@ -163,7 +163,7 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
     pid_t pid = reinterpret_cast<pid_t(*)()>(real_vfork)();
 
     if (pid == -1)
-        throwFromErrno("Cannot vfork", ErrorCodes::CANNOT_FORK);
+        throw ErrnoException(ErrorCodes::CANNOT_FORK, "Cannot vfork");
 
     if (0 == pid)
     {
@@ -305,7 +305,7 @@ int ShellCommand::tryWait()
     while (waitpid(pid, &status, 0) < 0)
     {
         if (errno != EINTR)
-            throwFromErrno("Cannot waitpid", ErrorCodes::CANNOT_WAITPID);
+            throw ErrnoException(ErrorCodes::CANNOT_WAITPID, "Cannot waitpid");
     }
 
     LOG_TRACE(getLogger(), "Wait for shell command pid {} completed with status {}", pid, status);
diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp
index a9ffce7ddf8..0a9aa2f2739 100644
--- a/src/Common/StatusFile.cpp
+++ b/src/Common/StatusFile.cpp
@@ -64,7 +64,7 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
     fd = ::open(path.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0666);
 
     if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + path, path, ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_OPEN_FILE, path, "Cannot open file {}", path);
 
     try
     {
@@ -74,14 +74,14 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
             if (errno == EWOULDBLOCK)
                 throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Cannot lock file {}. Another server instance in same directory is already running.", path);
             else
-                throwFromErrnoWithPath("Cannot lock file " + path, path, ErrorCodes::CANNOT_OPEN_FILE);
+                ErrnoException::throwFromPath(ErrorCodes::CANNOT_OPEN_FILE, path, "Cannot lock file {}", path);
         }
 
         if (0 != ftruncate(fd, 0))
-            throwFromErrnoWithPath("Cannot ftruncate " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_TRUNCATE_FILE, path, "Cannot ftruncate file {}", path);
 
         if (0 != lseek(fd, 0, SEEK_SET))
-            throwFromErrnoWithPath("Cannot lseek " + path, path, ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, path, "Cannot lseek file {}", path);
 
         /// Write information about current server instance to the file.
         WriteBufferFromFileDescriptor out(fd, 1024);
diff --git a/src/Common/TerminalSize.cpp b/src/Common/TerminalSize.cpp
index c53494fe9a0..bc5b4474384 100644
--- a/src/Common/TerminalSize.cpp
+++ b/src/Common/TerminalSize.cpp
@@ -19,12 +19,12 @@ uint16_t getTerminalWidth()
     if (isatty(STDIN_FILENO))
     {
         if (ioctl(STDIN_FILENO, TIOCGWINSZ, &terminal_size))
-            DB::throwFromErrno("Cannot obtain terminal window size (ioctl TIOCGWINSZ)", DB::ErrorCodes::SYSTEM_ERROR);
+            throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)");
     }
     else if (isatty(STDERR_FILENO))
     {
         if (ioctl(STDERR_FILENO, TIOCGWINSZ, &terminal_size))
-            DB::throwFromErrno("Cannot obtain terminal window size (ioctl TIOCGWINSZ)", DB::ErrorCodes::SYSTEM_ERROR);
+            throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)");
     }
     /// Default - 0.
     return terminal_size.ws_col;
diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp
index 433b8a76dba..50e2d81047d 100644
--- a/src/Common/ThreadFuzzer.cpp
+++ b/src/Common/ThreadFuzzer.cpp
@@ -258,10 +258,10 @@ void ThreadFuzzer::setup() const
 
 #if defined(OS_LINUX)
     if (sigemptyset(&sa.sa_mask))
-        throwFromErrno("Failed to clean signal mask for thread fuzzer", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
+        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to clean signal mask for thread fuzzer");
 
     if (sigaddset(&sa.sa_mask, SIGPROF))
-        throwFromErrno("Failed to add signal to mask for thread fuzzer", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
+        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to add signal to mask for thread fuzzer");
 #else
     // the two following functions always return 0 under mac
     sigemptyset(&sa.sa_mask);
@@ -269,7 +269,7 @@ void ThreadFuzzer::setup() const
 #endif
 
     if (sigaction(SIGPROF, &sa, nullptr))
-        throwFromErrno("Failed to setup signal handler for thread fuzzer", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to setup signal handler for thread fuzzer");
 
     static constexpr UInt32 timer_precision = 1000000;
 
@@ -280,7 +280,7 @@ void ThreadFuzzer::setup() const
     struct itimerval timer = {.it_interval = interval, .it_value = interval};
 
     if (0 != setitimer(ITIMER_PROF, &timer, nullptr))
-        throwFromErrno("Failed to create profiling timer", ErrorCodes::CANNOT_CREATE_TIMER);
+        throw ErrnoException(ErrorCodes::CANNOT_CREATE_TIMER, "Failed to create profiling timer");
 }
 
 
diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp
index 2fb9618b60a..248febe226e 100644
--- a/src/Common/TimerDescriptor.cpp
+++ b/src/Common/TimerDescriptor.cpp
@@ -24,7 +24,7 @@ TimerDescriptor::TimerDescriptor(int clockid, int flags)
         throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Cannot create timer_fd descriptor");
 
     if (-1 == fcntl(timer_fd, F_SETFL, O_NONBLOCK))
-        throwFromErrno("Cannot set O_NONBLOCK for timer_fd", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot set O_NONBLOCK for timer_fd");
 }
 
 TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept : timer_fd(other.timer_fd)
@@ -57,7 +57,7 @@ void TimerDescriptor::reset() const
     spec.it_value.tv_nsec = 0;
 
     if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr))
-        throwFromErrno("Cannot reset timer_fd", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
+        throw ErrnoException(ErrorCodes::CANNOT_SET_TIMER_PERIOD, "Cannot reset timer_fd");
 
     /// Drain socket.
     /// It may be possible that alarm happened and socket is readable.
@@ -78,7 +78,7 @@ void TimerDescriptor::drain() const
                 break;
 
             if (errno != EINTR)
-                throwFromErrno("Cannot drain timer_fd", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+                throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd");
         }
     }
 }
@@ -94,7 +94,7 @@ void TimerDescriptor::setRelative(uint64_t usec) const
     spec.it_value.tv_nsec = (usec % TIMER_PRECISION) * 1'000;
 
     if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr))
-        throwFromErrno("Cannot set time for timer_fd", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
+        throw ErrnoException(ErrorCodes::CANNOT_SET_TIMER_PERIOD, "Cannot set time for timer_fd");
 }
 
 void TimerDescriptor::setRelative(Poco::Timespan timespan) const
diff --git a/src/Common/assertProcessUserMatchesDataOwner.cpp b/src/Common/assertProcessUserMatchesDataOwner.cpp
index f2557a4aaaf..a2f87825257 100644
--- a/src/Common/assertProcessUserMatchesDataOwner.cpp
+++ b/src/Common/assertProcessUserMatchesDataOwner.cpp
@@ -31,7 +31,8 @@ namespace
         const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result);
 
         if (error)
-            throwFromErrno("Failed to find user name for " + std::to_string(user_id), ErrorCodes::FAILED_TO_GETPWUID, error);
+            ErrnoException::throwWithErrno(
+                ErrorCodes::FAILED_TO_GETPWUID, error, "Failed to find user name for {}", std::to_string(user_id));
         else if (result)
             return result->pw_name;
         return std::to_string(user_id);
diff --git a/src/Common/atomicRename.cpp b/src/Common/atomicRename.cpp
index 69e077e38f5..44e02995858 100644
--- a/src/Common/atomicRename.cpp
+++ b/src/Common/atomicRename.cpp
@@ -87,10 +87,12 @@ static bool renameat2(const std::string & old_path, const std::string & new_path
         return false;
 
     if (errno == EEXIST)
-        throwFromErrno(fmt::format("Cannot rename {} to {} because the second path already exists", old_path, new_path), ErrorCodes::ATOMIC_RENAME_FAIL);
+        throw ErrnoException(
+            ErrorCodes::ATOMIC_RENAME_FAIL, "Cannot rename {} to {} because the second path already exists", old_path, new_path);
     if (errno == ENOENT)
-        throwFromErrno(fmt::format("Paths cannot be exchanged because {} or {} does not exist", old_path, new_path), ErrorCodes::ATOMIC_RENAME_FAIL);
-    throwFromErrnoWithPath(fmt::format("Cannot rename {} to {}", old_path, new_path), new_path, ErrorCodes::SYSTEM_ERROR);
+        throw ErrnoException(
+            ErrorCodes::ATOMIC_RENAME_FAIL, "Paths cannot be exchanged because {} or {} does not exist", old_path, new_path);
+    ErrnoException::throwFromPath(ErrorCodes::SYSTEM_ERROR, new_path, "Cannot rename {} to {}", old_path, new_path);
 }
 
 bool supportsAtomicRename()
@@ -139,11 +141,12 @@ static bool renameat2(const std::string & old_path, const std::string & new_path
     if (errnum == ENOTSUP || errnum == EINVAL)
         return false;
     if (errnum == EEXIST)
-        throwFromErrno(fmt::format("Cannot rename {} to {} because the second path already exists", old_path, new_path), ErrorCodes::ATOMIC_RENAME_FAIL);
+        throw ErrnoException(
+            ErrorCodes::ATOMIC_RENAME_FAIL, "Cannot rename {} to {} because the second path already exists", old_path, new_path);
     if (errnum == ENOENT)
-        throwFromErrno(fmt::format("Paths cannot be exchanged because {} or {} does not exist", old_path, new_path), ErrorCodes::ATOMIC_RENAME_FAIL);
-    throwFromErrnoWithPath(
-        fmt::format("Cannot rename {} to {}: {}", old_path, new_path, strerror(errnum)), new_path, ErrorCodes::SYSTEM_ERROR);
+        throw ErrnoException(
+            ErrorCodes::ATOMIC_RENAME_FAIL, "Paths cannot be exchanged because {} or {} does not exist", old_path, new_path);
+    ErrnoException::throwFromPath(ErrorCodes::SYSTEM_ERROR, new_path, "Cannot rename {} to {}", old_path, new_path);
 }
 
 
diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp
index 5ab8d124fe4..8c2a0aaed7f 100644
--- a/src/Common/checkStackSize.cpp
+++ b/src/Common/checkStackSize.cpp
@@ -54,7 +54,7 @@ static size_t getStackSize(void ** out_address)
 #   if defined(OS_FREEBSD) || defined(OS_SUNOS)
     pthread_attr_init(&attr);
     if (0 != pthread_attr_get_np(pthread_self(), &attr))
-        throwFromErrno("Cannot pthread_attr_get_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
+        throw ErrnoException(ErrorCodes::CANNOT_PTHREAD_ATTR, "Cannot pthread_attr_get_np");
 #   else
     if (0 != pthread_getattr_np(pthread_self(), &attr))
     {
@@ -64,14 +64,14 @@ static size_t getStackSize(void ** out_address)
             return 0;
         }
         else
-            throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
+            throw ErrnoException(ErrorCodes::CANNOT_PTHREAD_ATTR, "Cannot pthread_getattr_np");
     }
 #   endif
 
     SCOPE_EXIT({ pthread_attr_destroy(&attr); });
 
     if (0 != pthread_attr_getstack(&attr, &address, &size))
-        throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
+        throw ErrnoException(ErrorCodes::CANNOT_PTHREAD_ATTR, "Cannot pthread_attr_getstack");
 
 #ifdef USE_MUSL
     /// Adjust stack size for the main thread under musl.
diff --git a/src/Common/createHardLink.cpp b/src/Common/createHardLink.cpp
index 238851d7f01..dcaf5e0fb10 100644
--- a/src/Common/createHardLink.cpp
+++ b/src/Common/createHardLink.cpp
@@ -26,19 +26,21 @@ void createHardLink(const String & source_path, const String & destination_path)
             struct stat destination_descr;
 
             if (0 != lstat(source_path.c_str(), &source_descr))
-                throwFromErrnoWithPath("Cannot stat " + source_path, source_path, ErrorCodes::CANNOT_STAT);
+                ErrnoException::throwFromPath(ErrorCodes::CANNOT_STAT, source_path, "Cannot stat {}", source_path);
 
             if (0 != lstat(destination_path.c_str(), &destination_descr))
-                throwFromErrnoWithPath("Cannot stat " + destination_path, destination_path, ErrorCodes::CANNOT_STAT);
+                ErrnoException::throwFromPath(ErrorCodes::CANNOT_STAT, destination_path, "Cannot stat {}", destination_path);
 
             if (source_descr.st_ino != destination_descr.st_ino)
-                throwFromErrnoWithPath(
-                        "Destination file " + destination_path + " is already exist and have different inode.",
-                        destination_path, ErrorCodes::CANNOT_LINK, link_errno);
+                ErrnoException::throwFromPathWithErrno(
+                    ErrorCodes::CANNOT_STAT,
+                    destination_path,
+                    link_errno,
+                    "Destination file {} already exists and has a different inode",
+                    destination_path);
         }
         else
-            throwFromErrnoWithPath("Cannot link " + source_path + " to " + destination_path, destination_path,
-                                   ErrorCodes::CANNOT_LINK);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_LINK, destination_path, "Cannot link {} to {}", source_path, destination_path);
     }
 }
 
diff --git a/src/Common/examples/arena_with_free_lists.cpp b/src/Common/examples/arena_with_free_lists.cpp
index 3f1b3e88328..63c2f231261 100644
--- a/src/Common/examples/arena_with_free_lists.cpp
+++ b/src/Common/examples/arena_with_free_lists.cpp
@@ -248,7 +248,7 @@ int main(int argc, char ** argv)
 
         rusage resource_usage;
         if (0 != getrusage(RUSAGE_SELF, &resource_usage))
-            throwFromErrno("Cannot getrusage", ErrorCodes::SYSTEM_ERROR);
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot getrusage");
 
         size_t allocated_bytes = resource_usage.ru_maxrss * 1024;
         std::cerr << "Current memory usage: " << allocated_bytes << " bytes.\n";
diff --git a/src/Common/examples/thread_creation_latency.cpp b/src/Common/examples/thread_creation_latency.cpp
index 60fb27dc345..d511cab9a0e 100644
--- a/src/Common/examples/thread_creation_latency.cpp
+++ b/src/Common/examples/thread_creation_latency.cpp
@@ -82,9 +82,9 @@ int main(int argc, char ** argv)
     {
         pthread_t thread;
         if (pthread_create(&thread, nullptr, g, nullptr))
-            DB::throwFromErrno("Cannot create thread.", DB::ErrorCodes::PTHREAD_ERROR);
+            throw DB::ErrnoException(DB::ErrorCodes::PTHREAD_ERROR, "Cannot create thread");
         if (pthread_join(thread, nullptr))
-            DB::throwFromErrno("Cannot join thread.", DB::ErrorCodes::PTHREAD_ERROR);
+            throw DB::ErrnoException(DB::ErrorCodes::PTHREAD_ERROR, "Cannot join thread");
     });
 
     test(n, "Create and destroy std::thread each iteration", []
diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp
index eabc7bdafbb..0d3b5cb83c8 100644
--- a/src/Common/filesystemHelpers.cpp
+++ b/src/Common/filesystemHelpers.cpp
@@ -49,7 +49,7 @@ struct statvfs getStatVFS(const String & path)
     {
         if (errno == EINTR)
             continue;
-        throwFromErrnoWithPath("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS);
+        DB::ErrnoException::throwFromPath(DB::ErrorCodes::CANNOT_STATVFS, path, "Could not calculate available disk space (statvfs)");
     }
     return fs;
 }
@@ -79,7 +79,7 @@ String getBlockDeviceId([[maybe_unused]] const String & path)
 #if defined(OS_LINUX)
     struct stat sb;
     if (lstat(path.c_str(), &sb))
-        throwFromErrnoWithPath("Cannot lstat " + path, path, ErrorCodes::CANNOT_STAT);
+        DB::ErrnoException::throwFromPath(DB::ErrorCodes::CANNOT_STAT, path, "Cannot lstat {}", path);
     WriteBufferFromOwnString ss;
     ss << major(sb.st_dev) << ":" << minor(sb.st_dev);
     return ss.str();
@@ -164,7 +164,7 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path)
     {
         struct stat st;
         if (stat(p.c_str(), &st))   /// NOTE: man stat does not list EINTR as possible error
-            throwFromErrnoWithPath("Cannot stat " + p.string(), p.string(), ErrorCodes::SYSTEM_ERROR);
+            DB::ErrnoException::throwFromPath(DB::ErrorCodes::SYSTEM_ERROR, p.string(), "Cannot stat {}", p.string());
         return st.st_dev;
     };
 
@@ -250,10 +250,8 @@ size_t getSizeFromFileDescriptor(int fd, const String & file_name)
     int res = fstat(fd, &buf);
     if (-1 == res)
     {
-        throwFromErrnoWithPath(
-            "Cannot execute fstat" + (file_name.empty() ? "" : " file: " + file_name),
-            file_name,
-            ErrorCodes::CANNOT_FSTAT);
+        DB::ErrnoException::throwFromPath(
+            DB::ErrorCodes::CANNOT_FSTAT, file_name, "Cannot execute fstat{}", file_name.empty() ? "" : " file: " + file_name);
     }
     return buf.st_size;
 }
@@ -263,10 +261,7 @@ Int64 getINodeNumberFromPath(const String & path)
     struct stat file_stat;
     if (stat(path.data(), &file_stat))
     {
-        throwFromErrnoWithPath(
-            "Cannot execute stat for file " + path,
-            path,
-            ErrorCodes::CANNOT_STAT);
+        DB::ErrnoException::throwFromPath(DB::ErrorCodes::CANNOT_STAT, path, "Cannot execute stat for file {}", path);
     }
     return file_stat.st_ino;
 }
@@ -302,7 +297,7 @@ bool createFile(const std::string & path)
         close(n);
         return true;
     }
-    DB::throwFromErrnoWithPath("Cannot create file: " + path, path, DB::ErrorCodes::CANNOT_CREATE_FILE);
+    DB::ErrnoException::throwFromPath(DB::ErrorCodes::CANNOT_CREATE_FILE, path, "Cannot create file: {}", path);
 }
 
 bool exists(const std::string & path)
@@ -317,7 +312,7 @@ bool canRead(const std::string & path)
         return true;
     if (errno == EACCES)
         return false;
-    DB::throwFromErrnoWithPath("Cannot check read access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
+    DB::ErrnoException::throwFromPath(DB::ErrorCodes::PATH_ACCESS_DENIED, path, "Cannot check read access to file: {}", path);
 }
 
 bool canWrite(const std::string & path)
@@ -327,7 +322,7 @@ bool canWrite(const std::string & path)
         return true;
     if (errno == EACCES)
         return false;
-    DB::throwFromErrnoWithPath("Cannot check write access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
+    DB::ErrnoException::throwFromPath(DB::ErrorCodes::PATH_ACCESS_DENIED, path, "Cannot check write access to file: {}", path);
 }
 
 bool canExecute(const std::string & path)
@@ -337,7 +332,7 @@ bool canExecute(const std::string & path)
         return true;
     if (errno == EACCES)
         return false;
-    DB::throwFromErrnoWithPath("Cannot check write access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
+    DB::ErrnoException::throwFromPath(DB::ErrorCodes::PATH_ACCESS_DENIED, path, "Cannot check execute access to file: {}", path);
 }
 
 time_t getModificationTime(const std::string & path)
@@ -369,7 +364,7 @@ void setModificationTime(const std::string & path, time_t time)
     tb.actime  = time;
     tb.modtime = time;
     if (utime(path.c_str(), &tb) != 0)
-        DB::throwFromErrnoWithPath("Cannot set modification time for file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
+        DB::ErrnoException::throwFromPath(DB::ErrorCodes::PATH_ACCESS_DENIED, path, "Cannot set modification time to file: {}", path);
 }
 
 bool isSymlink(const fs::path & path)
diff --git a/src/Common/hasLinuxCapability.cpp b/src/Common/hasLinuxCapability.cpp
index 5d823b4ecaf..bf236eb5c56 100644
--- a/src/Common/hasLinuxCapability.cpp
+++ b/src/Common/hasLinuxCapability.cpp
@@ -27,7 +27,7 @@ static __user_cap_data_struct getCapabilities()
 
     /// Avoid dependency on 'libcap'.
     if (0 != syscall(SYS_capget, &request, &response))
-        throwFromErrno("Cannot do 'capget' syscall", ErrorCodes::NETLINK_ERROR);
+        throw ErrnoException(ErrorCodes::NETLINK_ERROR, "Cannot do 'capget' syscall");
 
     return response;
 }
diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index ac6daf620d0..399de9d89a0 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -28,9 +28,7 @@ struct NetworkInterfaces : public boost::noncopyable
     NetworkInterfaces()
     {
         if (getifaddrs(&ifaddr) == -1)
-        {
-            throwFromErrno("Cannot getifaddrs", ErrorCodes::SYSTEM_ERROR);
-        }
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot getifaddrs");
     }
 
     bool hasAddress(const Poco::Net::IPAddress & address) const
diff --git a/src/Common/randomSeed.cpp b/src/Common/randomSeed.cpp
index e10ef87283f..e9616abf7ca 100644
--- a/src/Common/randomSeed.cpp
+++ b/src/Common/randomSeed.cpp
@@ -24,7 +24,7 @@ DB::UInt64 randomSeed()
 {
     struct timespec times;
     if (clock_gettime(CLOCK_MONOTONIC, &times))
-        DB::throwFromErrno("Cannot clock_gettime.", DB::ErrorCodes::CANNOT_CLOCK_GETTIME);
+        throw DB::ErrnoException(DB::ErrorCodes::CANNOT_CLOCK_GETTIME, "Cannot clock_gettime");
 
     /// Not cryptographically secure as time, pid and stack address can be predictable.
 
diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp
index 206314ea295..911447d3adc 100644
--- a/src/Common/remapExecutable.cpp
+++ b/src/Common/remapExecutable.cpp
@@ -120,7 +120,7 @@ __attribute__((__noinline__)) void remapToHugeStep1(void * begin, size_t size)
 
     void * scratch = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (MAP_FAILED == scratch)
-        throwFromErrno(fmt::format("Cannot mmap {} bytes", size), ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        throw ErrnoException(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Cannot mmap {} bytes", size);
 
     memcpy(scratch, begin, size);
 
diff --git a/src/Common/setThreadName.cpp b/src/Common/setThreadName.cpp
index f90398825af..e14abb247f3 100644
--- a/src/Common/setThreadName.cpp
+++ b/src/Common/setThreadName.cpp
@@ -44,7 +44,7 @@ void setThreadName(const char * name)
     if (0 != prctl(PR_SET_NAME, name, 0, 0, 0))
 #endif
         if (errno != ENOSYS && errno != EPERM)    /// It's ok if the syscall is unsupported or not allowed in some environments.
-            DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR);
+            throw DB::ErrnoException(DB::ErrorCodes::PTHREAD_ERROR, "Cannot set thread name with prctl(PR_SET_NAME, ...)");
 
     memcpy(thread_name, name, std::min<size_t>(1 + strlen(name), THREAD_NAME_SIZE - 1));
 }
@@ -64,7 +64,7 @@ const char * getThreadName()
 #else
     if (0 != prctl(PR_GET_NAME, thread_name, 0, 0, 0))
         if (errno != ENOSYS && errno != EPERM)    /// It's ok if the syscall is unsupported or not allowed in some environments.
-            DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)", DB::ErrorCodes::PTHREAD_ERROR);
+            throw DB::ErrnoException(DB::ErrorCodes::PTHREAD_ERROR, "Cannot get thread name with prctl(PR_GET_NAME)");
 #endif
 
     return thread_name;
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index d66bdf3583f..44a47de6918 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -92,10 +92,10 @@ PipeFDs signal_pipe;
 static void call_default_signal_handler(int sig)
 {
     if (SIG_ERR == signal(sig, SIG_DFL))
-        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
 
     if (0 != raise(sig))
-        throwFromErrno("Cannot send signal.", ErrorCodes::CANNOT_SEND_SIGNAL);
+        throw ErrnoException(ErrorCodes::CANNOT_SEND_SIGNAL, "Cannot send signal");
 }
 
 static const size_t signal_pipe_buf_size =
@@ -659,7 +659,17 @@ BaseDaemon::~BaseDaemon()
     /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after.
     for (int sig : handled_signals)
         if (SIG_ERR == signal(sig, SIG_DFL))
-            throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        {
+            try
+            {
+                throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
+            }
+            catch (ErrnoException &)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+
     signal_pipe.close();
 }
 
@@ -1129,7 +1139,7 @@ void BaseDaemon::setupWatchdog()
         pid = fork();
 
         if (-1 == pid)
-            throwFromErrno("Cannot fork", ErrorCodes::SYSTEM_ERROR);
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot fork");
 
         if (0 == pid)
         {
@@ -1225,7 +1235,7 @@ void BaseDaemon::setupWatchdog()
                 if (SIG_ERR == signal(sig, SIG_IGN))
                 {
                     char * signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe)
-                    throwFromErrno(fmt::format("Cannot ignore {}", signal_description), ErrorCodes::SYSTEM_ERROR);
+                    throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot ignore {}", signal_description);
                 }
             }
         }
@@ -1315,7 +1325,7 @@ void systemdNotify(const std::string_view & command)
     int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
 
     if (s == -1)
-        throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR);
+        throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Can't create UNIX socket for systemd notify");
 
     SCOPE_EXIT({ close(s); });
 
@@ -1351,7 +1361,7 @@ void systemdNotify(const std::string_view & command)
             if (errno == EINTR)
                 continue;
             else
-                throwFromErrno("Failed to notify systemd, sendto returned error.", ErrorCodes::SYSTEM_ERROR);
+                throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Failed to notify systemd, sendto returned error");
         }
         else
             sent_bytes_total += sent_bytes;
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index f5f30d0e977..12b0dc07799 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -675,8 +675,11 @@ ASTPtr DatabaseOnDisk::parseQueryFromMetadata(
         if (errno == ENOENT && !throw_on_error)
             return nullptr;
 
-        throwFromErrnoWithPath("Cannot open file " + metadata_file_path, metadata_file_path,
-                               errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE,
+            metadata_file_path,
+            "Cannot open file {}",
+            metadata_file_path);
     }
 
     ReadBufferFromFile in(metadata_file_fd, metadata_file_path, METADATA_FILE_BUFFER_SIZE);
diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h
index 6c98ce9c180..68f727c019c 100644
--- a/src/Dictionaries/SSDCacheDictionaryStorage.h
+++ b/src/Dictionaries/SSDCacheDictionaryStorage.h
@@ -481,7 +481,7 @@ public:
         if (file.fd == -1)
         {
             auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE;
-            throwFromErrnoWithPath("Cannot open file " + file_path, file_path, error_code);
+            ErrnoException::throwFromPath(error_code, file_path, "Cannot open file {}", file_path);
         }
 
         allocateSizeForNextPartition();
@@ -490,7 +490,8 @@ public:
     void allocateSizeForNextPartition()
     {
         if (preallocateDiskSpace(file.fd, current_blocks_size * block_size, block_size * file_blocks_size) < 0)
-            throwFromErrnoWithPath("Cannot preallocate space for the file " + file_path, file_path, ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+            ErrnoException::throwFromPath(
+                ErrorCodes::CANNOT_ALLOCATE_MEMORY, file_path, "Cannot preallocate space for the file {}", file_path);
 
         current_blocks_size += file_blocks_size;
     }
@@ -552,11 +553,11 @@ public:
         Stopwatch watch;
         #if defined(OS_DARWIN)
         if (::fsync(file.fd) < 0)
-            throwFromErrnoWithPath("Cannot fsync " + file_path, file_path, ErrorCodes::CANNOT_FSYNC);
-        #else
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_FSYNC, file_path, "Cannot fsync {}", file_path);
+#    else
         if (::fdatasync(file.fd) < 0)
-            throwFromErrnoWithPath("Cannot fdatasync " + file_path, file_path, ErrorCodes::CANNOT_FSYNC);
-        #endif
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_FSYNC, file_path, "Cannot fdatasync {}", file_path);
+#    endif
         ProfileEvents::increment(ProfileEvents::FileSyncElapsedMicroseconds, watch.elapsedMicroseconds());
 
         current_block_index += buffer_size_in_blocks;
@@ -598,13 +599,13 @@ public:
         while (io_submit(aio_context.ctx, 1, &request_ptr) != 1)
         {
             if (errno != EINTR)
-                throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT);
+                throw ErrnoException(ErrorCodes::CANNOT_IO_SUBMIT, "io_submit: Failed to submit a request for asynchronous IO");
         }
 
         while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) != 1)
         {
             if (errno != EINTR)
-                throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS);
+                throw ErrnoException(ErrorCodes::CANNOT_IO_GETEVENTS, "io_getevents: Failed to get an event for asynchronous IO");
         }
 
         auto read_bytes = eventResult(event);
@@ -692,7 +693,7 @@ public:
             while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) <= 0)
             {
                 if (errno != EINTR)
-                    throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS);
+                    throw ErrnoException(ErrorCodes::CANNOT_IO_GETEVENTS, "io_getevents: Failed to get an event for asynchronous IO");
             }
 
             for (size_t i = to_pop; i < to_pop + popped; ++i)
@@ -743,7 +744,7 @@ public:
             while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) <= 0)
             {
                 if (errno != EINTR)
-                    throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT);
+                    throw ErrnoException(ErrorCodes::CANNOT_IO_SUBMIT, "io_submit: Failed to submit a request for asynchronous IO");
             }
 
             to_push += pushed;
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index 394c08e4876..b1f55e96967 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -359,21 +359,21 @@ void DiskLocal::removeFile(const String & path)
 {
     auto fs_path = fs::path(disk_path) / path;
     if (0 != unlink(fs_path.c_str()))
-        throwFromErrnoWithPath("Cannot unlink file " + fs_path.string(), fs_path, ErrorCodes::CANNOT_UNLINK);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_UNLINK, fs_path, "Cannot unlink file {}", fs_path);
 }
 
 void DiskLocal::removeFileIfExists(const String & path)
 {
     auto fs_path = fs::path(disk_path) / path;
     if (0 != unlink(fs_path.c_str()) && errno != ENOENT)
-        throwFromErrnoWithPath("Cannot unlink file " + fs_path.string(), fs_path, ErrorCodes::CANNOT_UNLINK);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_UNLINK, fs_path, "Cannot unlink file {}", fs_path);
 }
 
 void DiskLocal::removeDirectory(const String & path)
 {
     auto fs_path = fs::path(disk_path) / path;
     if (0 != rmdir(fs_path.c_str()))
-        throwFromErrnoWithPath("Cannot rmdir " + fs_path.string(), fs_path, ErrorCodes::CANNOT_RMDIR);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_RMDIR, fs_path, "Cannot rmdir {}", fs_path);
 }
 
 void DiskLocal::removeRecursive(const String & path)
@@ -412,7 +412,7 @@ void DiskLocal::truncateFile(const String & path, size_t size)
 {
     int res = truncate((fs::path(disk_path) / path).string().data(), size);
     if (-1 == res)
-        throwFromErrnoWithPath("Cannot truncate file " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_TRUNCATE_FILE, path, "Cannot truncate {}", path);
 }
 
 void DiskLocal::createFile(const String & path)
@@ -709,7 +709,7 @@ struct stat DiskLocal::stat(const String & path) const
     auto full_path = fs::path(disk_path) / path;
     if (::stat(full_path.string().c_str(), &st) == 0)
         return st;
-    DB::throwFromErrnoWithPath("Cannot stat file: " + path, path, DB::ErrorCodes::CANNOT_STAT);
+    DB::ErrnoException::throwFromPath(DB::ErrorCodes::CANNOT_STAT, path, "Cannot stat file: {}", path);
 }
 
 void DiskLocal::chmod(const String & path, mode_t mode)
@@ -717,7 +717,7 @@ void DiskLocal::chmod(const String & path, mode_t mode)
     auto full_path = fs::path(disk_path) / path;
     if (::chmod(full_path.string().c_str(), mode) == 0)
         return;
-    DB::throwFromErrnoWithPath("Cannot chmod file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
+    DB::ErrnoException::throwFromPath(DB::ErrorCodes::PATH_ACCESS_DENIED, path, "Cannot chmod file: {}", path);
 }
 
 void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check)
diff --git a/src/Disks/IO/IOUringReader.cpp b/src/Disks/IO/IOUringReader.cpp
index 0e11b42ab01..4c9f665093d 100644
--- a/src/Disks/IO/IOUringReader.cpp
+++ b/src/Disks/IO/IOUringReader.cpp
@@ -77,7 +77,7 @@ IOUringReader::IOUringReader(uint32_t entries_)
 
     int ret = io_uring_queue_init_params(entries_, &ring, &params);
     if (ret < 0)
-        throwFromErrno("Failed initializing io_uring", ErrorCodes::IO_URING_INIT_FAILED, -ret);
+        ErrnoException::throwWithErrno(ErrorCodes::IO_URING_INIT_FAILED, -ret, "Failed initializing io_uring");
 
     cq_entries = params.cq_entries;
     ring_completion_monitor = std::make_unique<ThreadFromGlobalPool>([this] { monitorRing(); });
diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp
index 2ffae9b3338..bb295643726 100644
--- a/src/Disks/IO/ThreadPoolReader.cpp
+++ b/src/Disks/IO/ThreadPoolReader.cpp
@@ -175,9 +175,8 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
                 else
                 {
                     ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
-                    promise.set_exception(std::make_exception_ptr(ErrnoException(
-                        fmt::format("Cannot read from file {}, {}", fd, errnoToString()),
-                        ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, errno)));
+                    promise.set_exception(std::make_exception_ptr(
+                        ErrnoException(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, "Cannot read from file {}", fd)));
                     return future;
                 }
             }
@@ -233,7 +232,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
             if (-1 == res && errno != EINTR)
             {
                 ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
-                throwFromErrno(fmt::format("Cannot read from file {}", fd), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+                throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, "Cannot read from file {}", fd);
             }
 
             bytes_read += res;
diff --git a/src/Disks/IO/WriteBufferFromTemporaryFile.cpp b/src/Disks/IO/WriteBufferFromTemporaryFile.cpp
index 03713adef02..c47fe281176 100644
--- a/src/Disks/IO/WriteBufferFromTemporaryFile.cpp
+++ b/src/Disks/IO/WriteBufferFromTemporaryFile.cpp
@@ -29,8 +29,7 @@ public:
 
         off_t res = lseek(fd, 0, SEEK_SET);
         if (-1 == res)
-            throwFromErrnoWithPath("Cannot reread temporary file " + file_name, file_name,
-                                   ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, file_name, "Cannot reread temporary file {}", file_name);
 
         return std::make_unique<ReadBufferFromTemporaryWriteBuffer>(fd, file_name, std::move(origin->tmp_file));
     }
diff --git a/src/Disks/LocalDirectorySyncGuard.cpp b/src/Disks/LocalDirectorySyncGuard.cpp
index 1ac57df63fb..6aff40cd47d 100644
--- a/src/Disks/LocalDirectorySyncGuard.cpp
+++ b/src/Disks/LocalDirectorySyncGuard.cpp
@@ -31,8 +31,8 @@ LocalDirectorySyncGuard::LocalDirectorySyncGuard(const String & full_path)
     : fd(::open(full_path.c_str(), O_DIRECTORY))
 {
     if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + full_path, full_path,
-            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, full_path, "Cannot open file {}", full_path);
 }
 
 LocalDirectorySyncGuard::~LocalDirectorySyncGuard()
@@ -45,7 +45,7 @@ LocalDirectorySyncGuard::~LocalDirectorySyncGuard()
 
 #if defined(OS_DARWIN)
         if (fcntl(fd, F_FULLFSYNC, 0))
-            throwFromErrno("Cannot fcntl(F_FULLFSYNC)", ErrorCodes::CANNOT_FSYNC);
+            throw ErrnoException(ErrorCodes::CANNOT_FSYNC, "Cannot fcntl(F_FULLFSYNC)");
 #else
         if (-1 == ::fdatasync(fd))
             throw Exception(ErrorCodes::CANNOT_FSYNC, "Cannot fdatasync");
diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
index 4cf3c23d5a6..c20a27e2384 100644
--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
@@ -141,7 +141,7 @@ void LocalObjectStorage::removeObject(const StoredObject & object)
         return;
 
     if (0 != unlink(object.remote_path.data()))
-        throwFromErrnoWithPath("Cannot unlink file " + object.remote_path, object.remote_path, ErrorCodes::CANNOT_UNLINK);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_UNLINK, object.remote_path, "Cannot unlink file {}", object.remote_path);
 }
 
 void LocalObjectStorage::removeObjects(const StoredObjects & objects)
diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp
index 349b8c71145..0f1e8a04236 100644
--- a/src/Functions/now64.cpp
+++ b/src/Functions/now64.cpp
@@ -30,7 +30,7 @@ Field nowSubsecond(UInt32 scale)
 
     timespec spec{};
     if (clock_gettime(CLOCK_REALTIME, &spec))
-        throwFromErrno("Cannot clock_gettime.", ErrorCodes::CANNOT_CLOCK_GETTIME);
+        throw ErrnoException(ErrorCodes::CANNOT_CLOCK_GETTIME, "Cannot clock_gettime");
 
     DecimalUtils::DecimalComponents<DateTime64> components{spec.tv_sec, spec.tv_nsec};
 
diff --git a/src/Functions/trap.cpp b/src/Functions/trap.cpp
index 6260056ef31..99430f039a4 100644
--- a/src/Functions/trap.cpp
+++ b/src/Functions/trap.cpp
@@ -165,7 +165,7 @@ public:
                         std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
                     void * map = mmap(hint, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
                     if (MAP_FAILED == map)
-                        throwFromErrno("Allocator: Cannot mmap", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+                        throw ErrnoException(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap");
                     maps.push_back(map);
                 }
             }
diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp
index 7088be633e5..abad8a0727d 100644
--- a/src/IO/AIO.cpp
+++ b/src/IO/AIO.cpp
@@ -46,7 +46,7 @@ AIOContext::AIOContext(unsigned int nr_events)
 {
     ctx = 0;
     if (io_setup(nr_events, &ctx) < 0)
-        DB::throwFromErrno("io_setup failed", DB::ErrorCodes::CANNOT_IOSETUP);
+        throw DB::ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed");
 }
 
 AIOContext::~AIOContext()
@@ -137,7 +137,7 @@ AIOContext::AIOContext(unsigned int)
 {
     ctx = io_setup();
     if (ctx < 0)
-        DB::throwFromErrno("io_setup failed", DB::ErrorCodes::CANNOT_IOSETUP);
+        throw ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed");
 }
 
 AIOContext::~AIOContext()
diff --git a/src/IO/AsynchronousReadBufferFromFile.cpp b/src/IO/AsynchronousReadBufferFromFile.cpp
index 0e6c8090cb5..3c2c923ee46 100644
--- a/src/IO/AsynchronousReadBufferFromFile.cpp
+++ b/src/IO/AsynchronousReadBufferFromFile.cpp
@@ -46,13 +46,17 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile(
     fd = ::open(file_name.c_str(), flags == -1 ? O_RDONLY | O_CLOEXEC : flags | O_CLOEXEC);
 
     if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
-                               errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot open file {}", file_name);
 #ifdef OS_DARWIN
     if (o_direct)
     {
         if (fcntl(fd, F_NOCACHE, 1) == -1)
-            throwFromErrnoWithPath("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE);
+            ErrnoException::throwFromPath(
+                errno == ENOENT ? ErrorCodes::CANNOT_OPEN_FILE : ErrorCodes::CANNOT_OPEN_FILE,
+                file_name,
+                "Cannot set F_NOCACHE on file {}",
+                file_name);
     }
 #endif
 }
diff --git a/src/IO/MMapReadBufferFromFile.cpp b/src/IO/MMapReadBufferFromFile.cpp
index 0596eba565f..d3eb11c920d 100644
--- a/src/IO/MMapReadBufferFromFile.cpp
+++ b/src/IO/MMapReadBufferFromFile.cpp
@@ -29,8 +29,8 @@ void MMapReadBufferFromFile::open()
     fd = ::open(file_name.c_str(), O_RDONLY | O_CLOEXEC);
 
     if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
-                               errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot open file {}", file_name);
 }
 
 
diff --git a/src/IO/MMappedFile.cpp b/src/IO/MMappedFile.cpp
index 9e45140d5f9..7249a25decb 100644
--- a/src/IO/MMappedFile.cpp
+++ b/src/IO/MMappedFile.cpp
@@ -30,8 +30,8 @@ void MMappedFile::open()
     fd = ::open(file_name.c_str(), O_RDONLY | O_CLOEXEC);
 
     if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
-                               errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot open file {}", file_name);
 }
 
 
diff --git a/src/IO/MMappedFileDescriptor.cpp b/src/IO/MMappedFileDescriptor.cpp
index 9cc1aaf656c..ebc4e7a6bbb 100644
--- a/src/IO/MMappedFileDescriptor.cpp
+++ b/src/IO/MMappedFileDescriptor.cpp
@@ -28,7 +28,7 @@ static size_t getFileSize(int fd)
 {
     struct stat stat_res {};
     if (0 != fstat(fd, &stat_res))
-        throwFromErrno("MMappedFileDescriptor: Cannot fstat.", ErrorCodes::CANNOT_STAT);
+        throw ErrnoException(ErrorCodes::CANNOT_STAT, "MMappedFileDescriptor: Cannot fstat");
 
     off_t file_size = stat_res.st_size;
 
@@ -63,8 +63,7 @@ void MMappedFileDescriptor::set(int fd_, size_t offset_, size_t length_)
 
     void * buf = mmap(nullptr, length, PROT_READ, MAP_PRIVATE, fd, offset);
     if (MAP_FAILED == buf)
-        throwFromErrno(fmt::format("MMappedFileDescriptor: Cannot mmap {}.", ReadableSize(length)),
-            ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        throw ErrnoException(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "MMappedFileDescriptor: Cannot mmap {}", ReadableSize(length));
 
     data = static_cast<char *>(buf);
 
@@ -88,8 +87,7 @@ void MMappedFileDescriptor::finish()
         return;
 
     if (0 != munmap(data, length))
-        throwFromErrno(fmt::format("MMappedFileDescriptor: Cannot munmap {}.", ReadableSize(length)),
-            ErrorCodes::CANNOT_MUNMAP);
+        throw ErrnoException(ErrorCodes::CANNOT_MUNMAP, "MMappedFileDescriptor: Cannot munmap {}", ReadableSize(length));
 
     length = 0;
 
diff --git a/src/IO/OpenedFile.cpp b/src/IO/OpenedFile.cpp
index b75e087e5c3..4677a8259db 100644
--- a/src/IO/OpenedFile.cpp
+++ b/src/IO/OpenedFile.cpp
@@ -30,8 +30,8 @@ void OpenedFile::open() const
     fd = ::open(file_name.c_str(), (flags == -1 ? 0 : flags) | O_RDONLY | O_CLOEXEC);
 
     if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
-            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        DB::ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot open file {}", file_name);
 }
 
 int OpenedFile::getFD() const
diff --git a/src/IO/ReadBufferFromFile.cpp b/src/IO/ReadBufferFromFile.cpp
index 79ac62c6421..7f98c6dddfa 100644
--- a/src/IO/ReadBufferFromFile.cpp
+++ b/src/IO/ReadBufferFromFile.cpp
@@ -45,13 +45,17 @@ ReadBufferFromFile::ReadBufferFromFile(
     fd = ::open(file_name.c_str(), flags == -1 ? O_RDONLY | O_CLOEXEC : flags | O_CLOEXEC);
 
     if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
-                               errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot open file {}", file_name);
 #ifdef OS_DARWIN
     if (o_direct)
     {
         if (fcntl(fd, F_NOCACHE, 1) == -1)
-            throwFromErrnoWithPath("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE);
+            ErrnoException::throwFromPath(
+                errno == ENOENT ? ErrorCodes::CANNOT_OPEN_FILE : ErrorCodes::CANNOT_OPEN_FILE,
+                file_name,
+                "Cannot set F_NOCACHE on file {}",
+                file_name);
     }
 #endif
 }
diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp
index 5a67dc7528c..3211f8eeb35 100644
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@@ -80,7 +80,8 @@ size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_
         if (-1 == res && errno != EINTR)
         {
             ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
-            throwFromErrnoWithPath("Cannot read from file: " + getFileName(), getFileName(), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+            ErrnoException::throwFromPath(
+                ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, getFileName(), "Cannot read from file {}", getFileName());
         }
 
         if (res > 0)
@@ -145,7 +146,7 @@ void ReadBufferFromFileDescriptor::prefetch(Priority)
 
     /// Ask OS to prefetch data into page cache.
     if (0 != posix_fadvise(fd, file_offset_of_buffer_end, internal_buffer.size(), POSIX_FADV_WILLNEED))
-        throwFromErrno("Cannot posix_fadvise", ErrorCodes::CANNOT_ADVISE);
+        throw ErrnoException(ErrorCodes::CANNOT_ADVISE, "Cannot posix_fadvise");
 #endif
 }
 
@@ -208,8 +209,12 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
 
             off_t res = ::lseek(fd, seek_pos, SEEK_SET);
             if (-1 == res)
-                throwFromErrnoWithPath(fmt::format("Cannot seek through file {} at offset {}", getFileName(), seek_pos), getFileName(),
-                    ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+                ErrnoException::throwFromPath(
+                    ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
+                    getFileName(),
+                    "Cannot seek through file {} at offset {}",
+                    getFileName(),
+                    seek_pos);
 
             /// Also note that seeking past the file size is not allowed.
             if (res != seek_pos)
@@ -237,8 +242,8 @@ void ReadBufferFromFileDescriptor::rewind()
         ProfileEvents::increment(ProfileEvents::Seek);
         off_t res = ::lseek(fd, 0, SEEK_SET);
         if (-1 == res)
-            throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(),
-                ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+            ErrnoException::throwFromPath(
+                ErrorCodes::CANNOT_SEEK_THROUGH_FILE, getFileName(), "Cannot seek through file {}", getFileName());
     }
     /// In case of pread, the ProfileEvents::Seek is not accounted, but it's Ok.
 
diff --git a/src/IO/SynchronousReader.cpp b/src/IO/SynchronousReader.cpp
index 07cc2fd0778..5061439bfd6 100644
--- a/src/IO/SynchronousReader.cpp
+++ b/src/IO/SynchronousReader.cpp
@@ -43,7 +43,7 @@ std::future<IAsynchronousReader::Result> SynchronousReader::submit(Request reque
 #if defined(POSIX_FADV_WILLNEED)
     int fd = assert_cast<const LocalFileDescriptor &>(*request.descriptor).fd;
     if (0 != posix_fadvise(fd, request.offset, request.size, POSIX_FADV_WILLNEED))
-        throwFromErrno("Cannot posix_fadvise", ErrorCodes::CANNOT_ADVISE);
+        throw ErrnoException(ErrorCodes::CANNOT_ADVISE, "Cannot posix_fadvise");
 #endif
 
     return std::async(std::launch::deferred, [request, this]
@@ -73,7 +73,7 @@ IAsynchronousReader::Result SynchronousReader::execute(Request request)
         if (-1 == res && errno != EINTR)
         {
             ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
-            throwFromErrno(fmt::format("Cannot read from file {}", fd), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+            throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, "Cannot read from file {}", fd);
         }
 
         if (res > 0)
diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp
index 97059ff8f48..e61f22ba3e7 100644
--- a/src/IO/WriteBufferFromFile.cpp
+++ b/src/IO/WriteBufferFromFile.cpp
@@ -46,14 +46,13 @@ WriteBufferFromFile::WriteBufferFromFile(
     fd = ::open(file_name.c_str(), flags == -1 ? O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC : flags | O_CLOEXEC, mode);
 
     if (-1 == fd)
-        throwFromErrnoWithPath("Cannot open file " + file_name, file_name,
-                               errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot open file {}", file_name);
 
 #ifdef OS_DARWIN
     if (o_direct)
     {
         if (fcntl(fd, F_NOCACHE, 1) == -1)
-            throwFromErrnoWithPath("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot set F_NOCACHE on file {}", file_name);
     }
 #endif
 }
diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp
index 135ff608967..ff04bfdd906 100644
--- a/src/IO/WriteBufferFromFileDescriptor.cpp
+++ b/src/IO/WriteBufferFromFileDescriptor.cpp
@@ -69,8 +69,8 @@ void WriteBufferFromFileDescriptor::nextImpl()
             String error_file_name = file_name;
             if (error_file_name.empty())
                 error_file_name = "(fd = " + toString(fd) + ")";
-            throwFromErrnoWithPath("Cannot write to file " + error_file_name, error_file_name,
-                                   ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR);
+            ErrnoException::throwFromPath(
+                ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR, error_file_name, "Cannot write to file {}", error_file_name);
         }
 
         if (res > 0)
@@ -137,7 +137,7 @@ void WriteBufferFromFileDescriptor::sync()
     ProfileEvents::increment(ProfileEvents::FileSyncElapsedMicroseconds, watch.elapsedMicroseconds());
 
     if (-1 == res)
-        throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR, getFileName(), "Cannot fsync {}", getFileName());
 }
 
 
@@ -145,8 +145,7 @@ off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT
 {
     off_t res = lseek(fd, offset, whence);
     if (-1 == res)
-        throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(),
-                               ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, getFileName(), "Cannot seek through {}", getFileName());
     return res;
 }
 
@@ -154,7 +153,7 @@ void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT
 {
     int res = ftruncate(fd, length);
     if (-1 == res)
-        throwFromErrnoWithPath("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_TRUNCATE_FILE, getFileName(), "Cannot truncate file {}", getFileName());
 }
 
 
@@ -163,7 +162,7 @@ off_t WriteBufferFromFileDescriptor::size() const
     struct stat buf;
     int res = fstat(fd, &buf);
     if (-1 == res)
-        throwFromErrnoWithPath("Cannot execute fstat " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSTAT);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_FSTAT, getFileName(), "Cannot execute fstat {}", getFileName());
     return buf.st_size;
 }
 
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 9ea94a5a8a9..52b2744b64d 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -289,7 +289,7 @@ BlockIO InterpreterSystemQuery::execute()
         {
             getContext()->checkAccess(AccessType::SYSTEM_SHUTDOWN);
             if (kill(0, SIGTERM))
-                throwFromErrno("System call kill(0, SIGTERM) failed", ErrorCodes::CANNOT_KILL);
+                throw ErrnoException(ErrorCodes::CANNOT_KILL, "System call kill(0, SIGTERM) failed");
             break;
         }
         case Type::KILL:
diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp
index fc1decc8482..046d0b4fc10 100644
--- a/src/Interpreters/JIT/CHJIT.cpp
+++ b/src/Interpreters/JIT/CHJIT.cpp
@@ -153,7 +153,7 @@ public:
             {
                 int res = mprotect(block.base(), block.blockSize(), protection_flags | PROT_READ);
                 if (res != 0)
-                    throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT);
+                    throw ErrnoException(ErrorCodes::CANNOT_MPROTECT, "Cannot mprotect memory region");
 
                 llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize());
                 invalidate_cache = false;
@@ -161,7 +161,7 @@ public:
 #    endif
             int res = mprotect(block.base(), block.blockSize(), protection_flags);
             if (res != 0)
-                throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT);
+                throw ErrnoException(ErrorCodes::CANNOT_MPROTECT, "Cannot mprotect memory region");
 
             if (invalidate_cache)
                 llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize());
@@ -232,10 +232,12 @@ private:
         int res = posix_memalign(&buf, page_size, allocate_size);
 
         if (res != 0)
-            throwFromErrno(
-                fmt::format("Cannot allocate memory (posix_memalign) alignment {} size {}.", page_size, ReadableSize(allocate_size)),
+            ErrnoException::throwWithErrno(
                 ErrorCodes::CANNOT_ALLOCATE_MEMORY,
-                res);
+                res,
+                "Cannot allocate memory (posix_memalign) alignment {} size {}",
+                page_size,
+                ReadableSize(allocate_size));
 
         page_blocks.emplace_back(buf, pages_to_allocate_size, page_size);
         page_blocks_allocated_size.emplace_back(0);
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 3189f093b50..eaab0f95725 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -221,7 +221,7 @@ void ThreadStatus::applyQuerySettings()
         LOG_TRACE(log, "Setting nice to {}", new_os_thread_priority);
 
         if (0 != setpriority(PRIO_PROCESS, static_cast<unsigned>(thread_id), new_os_thread_priority))
-            throwFromErrno("Cannot 'setpriority'", ErrorCodes::CANNOT_SET_THREAD_PRIORITY);
+            throw ErrnoException(ErrorCodes::CANNOT_SET_THREAD_PRIORITY, "Cannot 'setpriority'");
 
         os_thread_priority = new_os_thread_priority;
     }
diff --git a/src/Loggers/ExtendedLogChannel.cpp b/src/Loggers/ExtendedLogChannel.cpp
index 116892b9030..634ae489dc1 100644
--- a/src/Loggers/ExtendedLogChannel.cpp
+++ b/src/Loggers/ExtendedLogChannel.cpp
@@ -19,7 +19,7 @@ ExtendedLogMessage ExtendedLogMessage::getFrom(const Poco::Message & base)
 
     ::timeval tv;
     if (0 != gettimeofday(&tv, nullptr))
-        DB::throwFromErrno("Cannot gettimeofday", ErrorCodes::CANNOT_GETTIMEOFDAY);
+        throw ErrnoException(ErrorCodes::CANNOT_GETTIMEOFDAY, "Cannot gettimeofday");
 
     msg_ext.time_seconds = static_cast<UInt32>(tv.tv_sec);
     msg_ext.time_microseconds = static_cast<UInt32>(tv.tv_usec);
diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp
index 4bc0372dfbe..447c102a195 100644
--- a/src/Processors/Executors/PollingQueue.cpp
+++ b/src/Processors/Executors/PollingQueue.cpp
@@ -25,7 +25,7 @@ namespace ErrorCodes
 PollingQueue::PollingQueue()
 {
     if (-1 == pipe2(pipe_fd, O_NONBLOCK))
-        throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
+        throw ErrnoException(ErrorCodes::CANNOT_OPEN_FILE, "Cannot create pipe");
 
     epoll.add(pipe_fd[0], pipe_fd);
 }
@@ -111,7 +111,7 @@ void PollingQueue::finish()
             break;
 
         if (errno != EINTR)
-            throwFromErrno("Cannot write to pipe", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+            throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot write to pipe");
     }
 }
 
diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp
index 7933b342dec..1f23292c6b3 100644
--- a/src/Processors/Sources/ShellCommandSource.cpp
+++ b/src/Processors/Sources/ShellCommandSource.cpp
@@ -44,7 +44,7 @@ static void makeFdNonBlocking(int fd)
 {
     bool result = tryMakeFdNonBlocking(fd);
     if (!result)
-        throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot set non-blocking mode of pipe");
 }
 
 static bool tryMakeFdBlocking(int fd)
@@ -63,7 +63,7 @@ static void makeFdBlocking(int fd)
 {
     bool result = tryMakeFdBlocking(fd);
     if (!result)
-        throwFromErrno("Cannot set blocking mode of pipe", ErrorCodes::CANNOT_FCNTL);
+        throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot set blocking mode of pipe");
 }
 
 static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds)
@@ -78,7 +78,7 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond
         if (res < 0)
         {
             if (errno != EINTR)
-                throwFromErrno("Cannot poll", ErrorCodes::CANNOT_POLL);
+                throw ErrnoException(ErrorCodes::CANNOT_POLL, "Cannot poll");
 
             const auto elapsed = watch.elapsedMilliseconds();
             if (timeout_milliseconds <= elapsed)
@@ -177,7 +177,7 @@ public:
                 ssize_t res = ::read(stdout_fd, internal_buffer.begin(), internal_buffer.size());
 
                 if (-1 == res && errno != EINTR)
-                    throwFromErrno("Cannot read from pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+                    throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, "Cannot read from pipe");
 
                 if (res == 0)
                     break;
@@ -261,7 +261,7 @@ public:
             ssize_t res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written);
 
             if ((-1 == res || 0 == res) && errno != EINTR)
-                throwFromErrno("Cannot write into pipe", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR);
+                throw ErrnoException(ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR, "Cannot write into pipe");
 
             if (res > 0)
                 bytes_written += res;
diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp
index 5e211bf036d..5a78baae53c 100644
--- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp
@@ -22,7 +22,7 @@ RemoteQueryExecutorReadContext::RemoteQueryExecutorReadContext(RemoteQueryExecut
     : AsyncTaskExecutor(std::make_unique<Task>(*this)), executor(executor_), suspend_when_query_sent(suspend_when_query_sent_)
 {
     if (-1 == pipe2(pipe_fd, O_NONBLOCK))
-        throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
+        throw ErrnoException(ErrorCodes::CANNOT_OPEN_FILE, "Cannot create pipe");
 
     epoll.add(pipe_fd[0]);
     epoll.add(timer.getDescriptor());
@@ -132,7 +132,7 @@ void RemoteQueryExecutorReadContext::cancelBefore()
             break;
 
         if (errno != EINTR)
-            throwFromErrno("Cannot write to pipe", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+            throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot write to pipe");
     }
 }
 
diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp
index 9b8fd069531..764ebcdf0b9 100644
--- a/src/Server/KeeperTCPHandler.cpp
+++ b/src/Server/KeeperTCPHandler.cpp
@@ -83,7 +83,7 @@ struct SocketInterruptablePollWrapper
 #if defined(POCO_HAVE_FD_EPOLL)
         epollfd = epoll_create(2);
         if (epollfd < 0)
-            throwFromErrno("Cannot epoll_create", ErrorCodes::SYSTEM_ERROR);
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot epoll_create");
 
         socket_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
         socket_event.data.fd = sockfd;
@@ -92,7 +92,7 @@ struct SocketInterruptablePollWrapper
             int err = ::close(epollfd);
             chassert(!err || errno == EINTR);
 
-            throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR);
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot epoll_create");
         }
         pipe_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
         pipe_event.data.fd = pipe.fds_rw[0];
@@ -101,7 +101,7 @@ struct SocketInterruptablePollWrapper
             int err = ::close(epollfd);
             chassert(!err || errno == EINTR);
 
-            throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR);
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot insert socket into epoll queue");
         }
 #endif
     }
diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp
index deadb10f9a9..f74f57926f9 100644
--- a/src/Server/MySQLHandlerFactory.cpp
+++ b/src/Server/MySQLHandlerFactory.cpp
@@ -67,10 +67,8 @@ void MySQLHandlerFactory::readRSAKeys()
         FILE * fp = fopen(certificate_file.data(), "r");
         if (fp == nullptr)
             throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Cannot open certificate file: {}.", certificate_file);
-        SCOPE_EXIT(
-            if (0 != fclose(fp))
-                throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE);
-        );
+        SCOPE_EXIT(if (0 != fclose(fp)) throw ErrnoException(
+                       ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file with the certificate in MySQLHandlerFactory"););
 
         X509 * x509 = PEM_read_X509(fp, nullptr, nullptr, nullptr);
         SCOPE_EXIT(X509_free(x509));
@@ -93,10 +91,8 @@ void MySQLHandlerFactory::readRSAKeys()
         FILE * fp = fopen(private_key_file.data(), "r");
         if (fp == nullptr)
             throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Cannot open private key file {}.", private_key_file);
-        SCOPE_EXIT(
-            if (0 != fclose(fp))
-                throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE);
-        );
+        SCOPE_EXIT(if (0 != fclose(fp)) throw ErrnoException(
+                       ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file with the certificate in MySQLHandlerFactory"););
 
         private_key.reset(PEM_read_RSAPrivateKey(fp, nullptr, nullptr, nullptr));
         if (!private_key)
diff --git a/src/Storages/FileLog/DirectoryWatcherBase.cpp b/src/Storages/FileLog/DirectoryWatcherBase.cpp
index 45a7f43fc95..8209483fac9 100644
--- a/src/Storages/FileLog/DirectoryWatcherBase.cpp
+++ b/src/Storages/FileLog/DirectoryWatcherBase.cpp
@@ -36,7 +36,7 @@ DirectoryWatcherBase::DirectoryWatcherBase(
 
     fd = inotify_init();
     if (fd == -1)
-        throwFromErrno("Cannot initialize inotify", ErrorCodes::IO_SETUP_ERROR);
+        throw ErrnoException(ErrorCodes::IO_SETUP_ERROR, "Cannot initialize inotify");
 
     watch_task = getContext()->getSchedulePool().createTask("directory_watch", [this] { watchFunc(); });
     start();
@@ -60,7 +60,7 @@ void DirectoryWatcherBase::watchFunc()
     if (wd == -1)
     {
         owner.onError(Exception(ErrorCodes::IO_SETUP_ERROR, "Watch directory {} failed", path));
-        throwFromErrnoWithPath("Watch directory {} failed", path, ErrorCodes::IO_SETUP_ERROR);
+        ErrnoException::throwFromPath(ErrorCodes::IO_SETUP_ERROR, path, "Watch directory {} failed", path);
     }
 
     std::string buffer;
diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp
index fad0447d2cf..6360bb2a3d5 100644
--- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp
@@ -95,8 +95,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl
     {
         int result = hdfsSync(fs.get(), fout);
         if (result < 0)
-            throwFromErrno("Cannot HDFS sync" + hdfs_uri + " " + std::string(hdfsGetLastError()),
-                ErrorCodes::CANNOT_FSYNC);
+            throw ErrnoException(ErrorCodes::CANNOT_FSYNC, "Cannot HDFS sync {} {}", hdfs_uri, std::string(hdfsGetLastError()));
     }
 };
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 47b915e3ed8..1004e826845 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -299,13 +299,13 @@ struct stat getFileStat(const String & current_path, bool use_table_fd, int tabl
     {
         /// Check if file descriptor allows random reads (and reading it twice).
         if (0 != fstat(table_fd, &file_stat))
-            throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT);
+            throw ErrnoException(ErrorCodes::CANNOT_STAT, "Cannot stat table file descriptor, inside {}", storage_name);
     }
     else
     {
         /// Check if file descriptor allows random reads (and reading it twice).
         if (0 != stat(current_path.c_str(), &file_stat))
-            throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT);
+            throw ErrnoException(ErrorCodes::CANNOT_STAT, "Cannot stat file {}", current_path);
     }
 
     return file_stat;
@@ -813,7 +813,7 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args)
     struct stat buf;
     int res = fstat(table_fd_, &buf);
     if (-1 == res)
-        throwFromErrno("Cannot execute fstat", res, ErrorCodes::CANNOT_FSTAT);
+        throw ErrnoException(ErrorCodes::CANNOT_FSTAT, "Cannot execute fstat");
     total_bytes_to_read = buf.st_size;
 
     if (args.getContext()->getApplicationType() == Context::ApplicationType::SERVER)
@@ -1793,7 +1793,7 @@ void StorageFile::truncate(
     if (use_table_fd)
     {
         if (0 != ::ftruncate(table_fd, 0))
-            throwFromErrno("Cannot truncate file at fd " + toString(table_fd), ErrorCodes::CANNOT_TRUNCATE_FILE);
+            throw ErrnoException(ErrorCodes::CANNOT_TRUNCATE_FILE, "Cannot truncate file at fd {}", toString(table_fd));
     }
     else
     {
@@ -1803,7 +1803,7 @@ void StorageFile::truncate(
                 continue;
 
             if (0 != ::truncate(path.c_str(), 0))
-                throwFromErrnoWithPath("Cannot truncate file " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE);
+                ErrnoException::throwFromPath(ErrorCodes::CANNOT_TRUNCATE_FILE, path, "Cannot truncate file at {}", path);
         }
     }
 }
diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp
index a860930087c..1baaf5144e8 100644
--- a/src/Storages/System/StorageSystemStackTrace.cpp
+++ b/src/Storages/System/StorageSystemStackTrace.cpp
@@ -133,7 +133,7 @@ bool wait(int timeout_ms)
                 continue;
             }
 
-            throwFromErrno("Cannot poll pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+            throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, "Cannot poll pipe");
         }
         if (poll_res == 0)
             return false;
@@ -146,7 +146,7 @@ bool wait(int timeout_ms)
             if (errno == EINTR)
                 continue;
 
-            throwFromErrno("Cannot read from pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
+            throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, "Cannot read from pipe");
         }
 
         if (read_res == sizeof(notification_num))
@@ -296,7 +296,7 @@ protected:
                     if (ESRCH == errno)
                         continue;
 
-                    throwFromErrno("Cannot send signal with sigqueue", ErrorCodes::CANNOT_SIGQUEUE);
+                    throw ErrnoException(ErrorCodes::CANNOT_SIGQUEUE, "Cannot send signal with sigqueue");
                 }
 
                 /// Just in case we will wait for pipe with timeout. In case signal didn't get processed.
@@ -402,13 +402,13 @@ StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_)
     sa.sa_flags = SA_SIGINFO;
 
     if (sigemptyset(&sa.sa_mask))
-        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
+        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Cannot set signal handler");
 
     if (sigaddset(&sa.sa_mask, sig))
-        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
+        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Cannot set signal handler");
 
     if (sigaction(sig, &sa, nullptr))
-        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
 }
 
 
diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index 611ca948c53..a4b579f1f7b 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -174,7 +174,7 @@ void Runner::thread(std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookee
         || sigaddset(&sig_set, SIGINT)
         || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
     {
-        DB::throwFromErrno("Cannot block signal.", DB::ErrorCodes::CANNOT_BLOCK_SIGNAL);
+        throw DB::ErrnoException(DB::ErrorCodes::CANNOT_BLOCK_SIGNAL, "Cannot block signal");
     }
 
     while (true)

From efa5724b61d284bdc0455e402e748f52e0ba7996 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Sat, 16 Dec 2023 02:46:51 +0000
Subject: [PATCH 060/137] fix test

---
 tests/queries/0_stateless/02178_column_function_insert_from.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02178_column_function_insert_from.sql b/tests/queries/0_stateless/02178_column_function_insert_from.sql
index dc7c134b6f9..fc692ec859c 100644
--- a/tests/queries/0_stateless/02178_column_function_insert_from.sql
+++ b/tests/queries/0_stateless/02178_column_function_insert_from.sql
@@ -9,7 +9,7 @@ INSERT INTO TESTTABLE values (0,'0',['1']), (1,'1',['1']);
 SET max_threads = 1;
 
 -- There is a bug which is fixed in new analyzer.
-SET max_bytes_before_external_group_by = 0;
+SET max_bytes_before_external_sort = 0;
 
 SELECT attr, _id, arrayFilter(x -> (x IN (select '1')), attr_list) z
 FROM TESTTABLE ARRAY JOIN z AS attr ORDER BY _id LIMIT 3 BY attr;

From 01f5b205175530e7189901d6a87157e2fe8a07f3 Mon Sep 17 00:00:00 2001
From: zhongyuankai <872237106@qq.com>
Date: Wed, 13 Dec 2023 15:42:43 +0800
Subject: [PATCH 061/137] Support ORDER BY ALL

---
 .../statements/select/order-by.md             | 16 +++++++++
 .../statements/select/order-by.md             | 16 +++++++++
 src/Interpreters/TreeRewriter.cpp             | 20 +++++++++++
 src/Parsers/ASTSelectQuery.cpp                |  5 ++-
 src/Parsers/ASTSelectQuery.h                  |  1 +
 src/Parsers/ParserSelectQuery.cpp             | 35 +++++++++++--------
 .../0_stateless/02943_order_by_all.reference  |  9 +++++
 .../0_stateless/02943_order_by_all.sql        | 15 ++++++++
 8 files changed, 102 insertions(+), 15 deletions(-)
 create mode 100644 tests/queries/0_stateless/02943_order_by_all.reference
 create mode 100644 tests/queries/0_stateless/02943_order_by_all.sql

diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index 53bdc9041a1..264d8ed323e 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -241,6 +241,22 @@ Result:
 └───┴─────────┘
 ```
 
+## ORDER BY ALL
+
+`ORDER BY ALL` is sorts all selected columns in ascending order.
+
+For example:
+
+``` sql
+SELECT a, b, c FROM t ORDER BY ALL
+```
+
+is the same as
+
+``` sql
+SELECT a, b, c FROM t ORDER BY a, b, c
+```
+
 ## Implementation Details
 
 Less RAM is used if a small enough [LIMIT](../../../sql-reference/statements/select/limit.md) is specified in addition to `ORDER BY`. Otherwise, the amount of memory spent is proportional to the volume of data for sorting. For distributed query processing, if [GROUP BY](../../../sql-reference/statements/select/group-by.md) is omitted, sorting is partially done on remote servers, and the results are merged on the requestor server. This means that for distributed sorting, the volume of data to sort can be greater than the amount of memory on a single server.
diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md
index 01f702a4b1e..3286fc9f9e7 100644
--- a/docs/zh/sql-reference/statements/select/order-by.md
+++ b/docs/zh/sql-reference/statements/select/order-by.md
@@ -61,6 +61,22 @@ sidebar_label: ORDER BY
 
 我们只建议使用 `COLLATE` 对于少量行的最终排序，因为排序与 `COLLATE` 比正常的按字节排序效率低。
 
+## ORDER BY ALL
+
+`ORDER BY ALL` 对所有选定的列进行升序排序。
+
+示例:
+
+``` sql
+SELECT a, b, c FROM t ORDER BY ALL
+```
+
+等同于：
+
+``` sql
+SELECT a, b, c FROM t ORDER BY a, b, c
+```
+
 ## 实现细节 {#implementation-details}
 
 更少的RAM使用，如果一个足够小 [LIMIT](../../../sql-reference/statements/select/limit.md) 除了指定 `ORDER BY`. 否则，所花费的内存量与用于排序的数据量成正比。 对于分布式查询处理，如果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 省略排序，在远程服务器上部分完成排序，并将结果合并到请求者服务器上。 这意味着对于分布式排序，要排序的数据量可以大于单个服务器上的内存量。
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index c63aae32090..62d3590f4e2 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -776,6 +776,22 @@ void expandGroupByAll(ASTSelectQuery * select_query)
     select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
 }
 
+void expandOrderByAll(ASTSelectQuery * select_query)
+{
+    auto order_expression_list = std::make_shared<ASTExpressionList>();
+
+    for (const auto & expr : select_query->select()->children)
+    {
+        auto elem = std::make_shared<ASTOrderByElement>();
+        elem->direction = 1;
+        elem->nulls_direction = 1;
+        elem->children.push_back(expr);
+        order_expression_list->children.push_back(elem);
+    }
+
+    select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, order_expression_list);
+}
+
 ASTs getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
 {
     /// There can not be aggregate functions inside the WHERE and PREWHERE.
@@ -1292,6 +1308,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     if (select_query->group_by_all)
         expandGroupByAll(select_query);
 
+    // expand ORDER BY ALL
+    if (select_query->order_by_all)
+        expandOrderByAll(select_query);
+
     /// Remove unneeded columns according to 'required_result_columns'.
     /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
     /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 7c96db006c4..d2c7de18c8a 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -144,7 +144,10 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
         window()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
     }
 
-    if (orderBy())
+    if (order_by_all)
+        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY ALL" << (s.hilite ? hilite_none : "");
+
+    if (!order_by_all && orderBy())
     {
         s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY" << (s.hilite ? hilite_none : "");
         s.one_line
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 57f45a8aacd..eb171dc00ee 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -87,6 +87,7 @@ public:
     bool group_by_with_cube = false;
     bool group_by_with_constant_keys = false;
     bool group_by_with_grouping_sets = false;
+    bool order_by_all = false;
     bool limit_with_ties = false;
 
     ASTPtr & refSelect()    { return getExpression(Expression::SELECT); }
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 341c1ef60b4..6c94eedc470 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -268,23 +268,30 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     /// ORDER BY expr ASC|DESC COLLATE 'locale' list
     if (s_order_by.ignore(pos, expected))
     {
-        if (!order_list.parse(pos, order_expression_list, expected))
-            return false;
-
-        /// if any WITH FILL parse possible INTERPOLATE list
-        if (std::any_of(order_expression_list->children.begin(), order_expression_list->children.end(),
-                [](auto & child) { return child->template as<ASTOrderByElement>()->with_fill; }))
+        if (s_all.ignore(pos, expected))
         {
-            if (s_interpolate.ignore(pos, expected))
+            select_query->order_by_all = true;
+        }
+        else
+        {
+            if (!order_list.parse(pos, order_expression_list, expected))
+                return false;
+
+            /// if any WITH FILL parse possible INTERPOLATE list
+            if (std::any_of(order_expression_list->children.begin(), order_expression_list->children.end(),
+                            [](auto & child) { return child->template as<ASTOrderByElement>()->with_fill; }))
             {
-                if (open_bracket.ignore(pos, expected))
+                if (s_interpolate.ignore(pos, expected))
                 {
-                    if (!interpolate_list.parse(pos, interpolate_expression_list, expected))
-                        return false;
-                    if (!close_bracket.ignore(pos, expected))
-                        return false;
-                } else
-                    interpolate_expression_list = std::make_shared<ASTExpressionList>();
+                    if (open_bracket.ignore(pos, expected))
+                    {
+                        if (!interpolate_list.parse(pos, interpolate_expression_list, expected))
+                            return false;
+                        if (!close_bracket.ignore(pos, expected))
+                            return false;
+                    } else
+                        interpolate_expression_list = std::make_shared<ASTExpressionList>();
+                }
             }
         }
     }
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
new file mode 100644
index 00000000000..f9dcebc156f
--- /dev/null
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -0,0 +1,9 @@
+abc1	3	2
+abc2	3	2
+abc3	2	3
+abc4	1	4
+abc1	1	1
+abc2	1	1
+abc3	1	1
+abc4	1	1
+abc	4
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
new file mode 100644
index 00000000000..1ddd8866533
--- /dev/null
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS order_by_all;
+
+CREATE TABLE order_by_all
+(
+    a String,
+    b int,
+    c int
+)
+engine = Memory;
+
+insert into order_by_all values ('abc2', 3, 2), ('abc3', 2, 3), ('abc4', 1, 4), ('abc1', 3, 2);
+
+select a, b, c from order_by_all order by all;
+select a, count(b), count(c) from order_by_all group by all order by all;
+select substring(a, 1, 3), count(b) from order_by_all group by all order by all;

From b0e17f8b999068818846553f1ea68c068ebf6b63 Mon Sep 17 00:00:00 2001
From: ubuntu <872237106@qq.com>
Date: Fri, 15 Dec 2023 11:32:07 +0800
Subject: [PATCH 062/137] fix test

---
 .../0_stateless/02943_order_by_all.reference       | 14 ++++++++------
 tests/queries/0_stateless/02943_order_by_all.sql   |  7 ++++---
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index f9dcebc156f..9760e4732da 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -1,9 +1,11 @@
 abc1	3	2
+abc2	1	1
 abc2	3	2
 abc3	2	3
-abc4	1	4
-abc1	1	1
-abc2	1	1
-abc3	1	1
-abc4	1	1
-abc	4
+1	abc1	1
+1	abc3	1
+2	abc2	2
+abc	1	1
+abc	2	3
+abc	3	2
+abc	3	2
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index 1ddd8866533..1039203f4df 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -8,8 +8,9 @@ CREATE TABLE order_by_all
 )
 engine = Memory;
 
-insert into order_by_all values ('abc2', 3, 2), ('abc3', 2, 3), ('abc4', 1, 4), ('abc1', 3, 2);
+insert into order_by_all values ('abc2', 3, 2), ('abc3', 2, 3), ('abc2', 1, 1), ('abc1', 3, 2);
 
 select a, b, c from order_by_all order by all;
-select a, count(b), count(c) from order_by_all group by all order by all;
-select substring(a, 1, 3), count(b) from order_by_all group by all order by all;
+select count(b), a, count(c) from order_by_all group by all order by all;
+select substring(a, 1, 3), b, c from order_by_all order by all;
+

From 2cfb543b33e0dc0854ac5026dda73574c0199617 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 15 Dec 2023 14:13:08 +0000
Subject: [PATCH 063/137] Doc updates

---
 .../statements/select/order-by.md             | 32 +++++++------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index 264d8ed323e..37e42f261fa 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -5,12 +5,20 @@ sidebar_label: ORDER BY
 
 # ORDER BY Clause
 
-The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`.  Sorting is case-sensitive.
+The `ORDER BY` clause contains
 
-If you want to sort by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
+- a list of expressions, e.g. `ORDER BY visits, search_phrase`,
+- a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or
+- `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`.
 
-Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time).
-If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well.
+To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0.
+
+Sort expressions or column numbers in `ORDER BY` can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction.
+If no sort order is specified explicitly, `ASC` is used as default.
+The sorting direction applies to a single expression, not to the entire list, e.g. `ORDER BY Visits DESC, SearchPhrase`. Sorting is performed case-sensitively.
+
+Rows with identical values for a sort expressions are returned in an arbitrary and non-deterministic order.
+If the `ORDER BY` clause is omitted in a `SELECT` statement, the row order is also arbitrary and non-deterministic.
 
 ## Sorting of Special Values
 
@@ -241,22 +249,6 @@ Result:
 └───┴─────────┘
 ```
 
-## ORDER BY ALL
-
-`ORDER BY ALL` is sorts all selected columns in ascending order.
-
-For example:
-
-``` sql
-SELECT a, b, c FROM t ORDER BY ALL
-```
-
-is the same as
-
-``` sql
-SELECT a, b, c FROM t ORDER BY a, b, c
-```
-
 ## Implementation Details
 
 Less RAM is used if a small enough [LIMIT](../../../sql-reference/statements/select/limit.md) is specified in addition to `ORDER BY`. Otherwise, the amount of memory spent is proportional to the volume of data for sorting. For distributed query processing, if [GROUP BY](../../../sql-reference/statements/select/group-by.md) is omitted, sorting is partially done on remote servers, and the results are merged on the requestor server. This means that for distributed sorting, the volume of data to sort can be greater than the amount of memory on a single server.

From 5ccc5b4fd825887ba229395256739a79848dacf8 Mon Sep 17 00:00:00 2001
From: zhongyuankai <872237106@qq.com>
Date: Sat, 16 Dec 2023 17:59:50 +0800
Subject: [PATCH 064/137] batter

---
 src/Core/Settings.h                           |  1 +
 src/Interpreters/TreeRewriter.cpp             | 22 ++++++--
 src/Parsers/ASTSelectQuery.cpp                | 21 ++++++--
 src/Parsers/ParserSelectQuery.cpp             | 50 +++++++++++--------
 .../0_stateless/02943_order_by_all.reference  | 31 ++++++++----
 .../0_stateless/02943_order_by_all.sql        | 19 ++++---
 6 files changed, 99 insertions(+), 45 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 69efedf5d3e..9d09ff42395 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -845,6 +845,7 @@ class IColumn;
     M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \
     M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
     M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
+    M(Bool, enable_order_by_all, true, "Clause ORDER BY supports specifying ALL, sorts by all columns in the SELECT clause.", 0)\
 
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 62d3590f4e2..27f0bf2502e 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -73,6 +73,7 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int UNKNOWN_IDENTIFIER;
+    extern const int UNEXPECTED_EXPRESSION;
 }
 
 namespace
@@ -778,13 +779,28 @@ void expandGroupByAll(ASTSelectQuery * select_query)
 
 void expandOrderByAll(ASTSelectQuery * select_query)
 {
+    auto * all_elem = select_query->orderBy()->children[0]->as<ASTOrderByElement>();
+    if (!all_elem)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not order by asts.");
+
     auto order_expression_list = std::make_shared<ASTExpressionList>();
 
     for (const auto & expr : select_query->select()->children)
     {
+        if (auto * identifier = expr->as<ASTIdentifier>(); identifier)
+            if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL")
+                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
+                                "The column name (all/ALL) conflicts with `ORDER BY ALL`, try to disable setting `enable_order_by_all`.");
+
+        if (auto * function = expr->as<ASTFunction>(); function)
+            if (Poco::toUpper(function->alias) == "ALL")
+                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
+                                "The column name (all/ALL) conflicts with `ORDER BY ALL`, try to disable setting `enable_order_by_all`.");
+
         auto elem = std::make_shared<ASTOrderByElement>();
-        elem->direction = 1;
-        elem->nulls_direction = 1;
+        elem->direction = all_elem->direction;
+        elem->nulls_direction = all_elem->nulls_direction;
+        elem->nulls_direction_was_explicitly_specified = all_elem->nulls_direction_was_explicitly_specified;
         elem->children.push_back(expr);
         order_expression_list->children.push_back(elem);
     }
@@ -1309,7 +1325,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
         expandGroupByAll(select_query);
 
     // expand ORDER BY ALL
-    if (select_query->order_by_all)
+    if (settings.enable_order_by_all && select_query->order_by_all)
         expandOrderByAll(select_query);
 
     /// Remove unneeded columns according to 'required_result_columns'.
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index d2c7de18c8a..2115de1c124 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -144,9 +144,6 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
         window()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
     }
 
-    if (order_by_all)
-        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY ALL" << (s.hilite ? hilite_none : "");
-
     if (!order_by_all && orderBy())
     {
         s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY" << (s.hilite ? hilite_none : "");
@@ -166,6 +163,24 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
         }
     }
 
+    if (order_by_all)
+    {
+        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY ALL" << (s.hilite ? hilite_none : "");
+
+        auto * elem = orderBy()->children[0]->as<ASTOrderByElement>();
+        s.ostr << (s.hilite ? hilite_keyword : "")
+               << (elem->direction == -1 ? " DESC" : " ASC")
+               << (s.hilite ? hilite_none : "");
+
+        if (elem->nulls_direction_was_explicitly_specified)
+        {
+            s.ostr << (s.hilite ? hilite_keyword : "")
+                   << " NULLS "
+                   << (elem->nulls_direction == elem->direction ? "LAST" : "FIRST")
+                   << (s.hilite ? hilite_none : "");
+        }
+    }
+
     if (limitByLength())
     {
         s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : "");
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 6c94eedc470..911ed546293 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -14,6 +14,7 @@
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTInterpolateElement.h>
 #include <Parsers/ASTIdentifier.h>
+#include <Poco/String.h>
 
 
 namespace DB
@@ -49,6 +50,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_having("HAVING");
     ParserKeyword s_window("WINDOW");
     ParserKeyword s_order_by("ORDER BY");
+    ParserKeyword ascending("ASCENDING");
+    ParserKeyword descending("DESCENDING");
+    ParserKeyword asc("ASC");
+    ParserKeyword desc("DESC");
+    ParserKeyword nulls("NULLS");
+    ParserKeyword first("FIRST");
+    ParserKeyword last("LAST");
     ParserKeyword s_limit("LIMIT");
     ParserKeyword s_settings("SETTINGS");
     ParserKeyword s_by("BY");
@@ -268,32 +276,32 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     /// ORDER BY expr ASC|DESC COLLATE 'locale' list
     if (s_order_by.ignore(pos, expected))
     {
-        if (s_all.ignore(pos, expected))
-        {
-            select_query->order_by_all = true;
-        }
-        else
-        {
-            if (!order_list.parse(pos, order_expression_list, expected))
-                return false;
+        if (!order_list.parse(pos, order_expression_list, expected))
+            return false;
 
-            /// if any WITH FILL parse possible INTERPOLATE list
-            if (std::any_of(order_expression_list->children.begin(), order_expression_list->children.end(),
-                            [](auto & child) { return child->template as<ASTOrderByElement>()->with_fill; }))
+        /// if any WITH FILL parse possible INTERPOLATE list
+        if (std::any_of(order_expression_list->children.begin(), order_expression_list->children.end(),
+                        [](auto & child) { return child->template as<ASTOrderByElement>()->with_fill; }))
+        {
+            if (s_interpolate.ignore(pos, expected))
             {
-                if (s_interpolate.ignore(pos, expected))
+                if (open_bracket.ignore(pos, expected))
                 {
-                    if (open_bracket.ignore(pos, expected))
-                    {
-                        if (!interpolate_list.parse(pos, interpolate_expression_list, expected))
-                            return false;
-                        if (!close_bracket.ignore(pos, expected))
-                            return false;
-                    } else
-                        interpolate_expression_list = std::make_shared<ASTExpressionList>();
-                }
+                    if (!interpolate_list.parse(pos, interpolate_expression_list, expected))
+                        return false;
+                    if (!close_bracket.ignore(pos, expected))
+                        return false;
+                } else
+                    interpolate_expression_list = std::make_shared<ASTExpressionList>();
             }
         }
+        else if (order_expression_list->children.size() == 1)
+        {
+            /// ORDER BY ALL [ASC|DESC] [NULLS [FIRST|LAST]]
+            auto * identifier = order_expression_list->children[0]->as<ASTOrderByElement>()->children[0]->as<ASTIdentifier>();
+            if (Poco::toUpper(identifier->name()) == "ALL")
+                select_query->order_by_all = true;
+        }
     }
 
     /// This is needed for TOP expression, because it can also use WITH TIES.
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index 9760e4732da..1d573800619 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -1,11 +1,20 @@
-abc1	3	2
-abc2	1	1
-abc2	3	2
-abc3	2	3
-1	abc1	1
-1	abc3	1
-2	abc2	2
-abc	1	1
-abc	2	3
-abc	3	2
-abc	3	2
+A	3
+B	1
+B	3
+C	\N
+B	1	1
+A	3	2
+B	3	2
+C	\N	3
+B	1	1
+B	3	2
+A	3	2
+C	\N	3
+C	\N
+B	3
+B	1
+A	3
+\N	C
+1	B
+3	A
+3	B
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index 1039203f4df..856eff586a7 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -3,14 +3,19 @@ DROP TABLE IF EXISTS order_by_all;
 CREATE TABLE order_by_all
 (
     a String,
-    b int,
-    c int
+    b Nullable(Int32),
+    all int,
 )
-engine = Memory;
+    engine = Memory;
 
-insert into order_by_all values ('abc2', 3, 2), ('abc3', 2, 3), ('abc2', 1, 1), ('abc1', 3, 2);
+INSERT INTO order_by_all VALUES ('B', 3, 2), ('C', NULL, 3), ('B', 1, 1), ('A', 3, 2);
 
-select a, b, c from order_by_all order by all;
-select count(b), a, count(c) from order_by_all group by all order by all;
-select substring(a, 1, 3), b, c from order_by_all order by all;
+SELECT a, b FROM order_by_all ORDER BY ALL;
+SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b, all FROM order_by_all ORDER BY all, a;
+SELECT a, b, all FROM order_by_all ORDER BY all settings enable_order_by_all = false;
+SELECT a, b FROM order_by_all ORDER BY ALL DESC;
+SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
+
+DROP TABLE IF EXISTS order_by_all;
 

From 0e8b26d84f68b3e8d515e02584c49f0ace853b24 Mon Sep 17 00:00:00 2001
From: ubuntu <872237106@qq.com>
Date: Sun, 17 Dec 2023 11:21:46 +0800
Subject: [PATCH 065/137] fix test

---
 src/Parsers/ParserSelectQuery.cpp             |  9 +---
 .../0_stateless/02943_order_by_all.reference  | 42 +++++++++++--------
 .../0_stateless/02943_order_by_all.sql        |  9 ++--
 3 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 911ed546293..74321434262 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -50,13 +50,6 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_having("HAVING");
     ParserKeyword s_window("WINDOW");
     ParserKeyword s_order_by("ORDER BY");
-    ParserKeyword ascending("ASCENDING");
-    ParserKeyword descending("DESCENDING");
-    ParserKeyword asc("ASC");
-    ParserKeyword desc("DESC");
-    ParserKeyword nulls("NULLS");
-    ParserKeyword first("FIRST");
-    ParserKeyword last("LAST");
     ParserKeyword s_limit("LIMIT");
     ParserKeyword s_settings("SETTINGS");
     ParserKeyword s_by("BY");
@@ -281,7 +274,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
         /// if any WITH FILL parse possible INTERPOLATE list
         if (std::any_of(order_expression_list->children.begin(), order_expression_list->children.end(),
-                        [](auto & child) { return child->template as<ASTOrderByElement>()->with_fill; }))
+                [](auto & child) { return child->template as<ASTOrderByElement>()->with_fill; }))
         {
             if (s_interpolate.ignore(pos, expected))
             {
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index 1d573800619..17867ec7f18 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -1,20 +1,28 @@
-A	3
-B	1
+A	2
 B	3
 C	\N
-B	1	1
-A	3	2
-B	3	2
-C	\N	3
-B	1	1
-B	3	2
-A	3	2
-C	\N	3
-C	\N
-B	3
-B	1
-A	3
-\N	C
-1	B
-3	A
+D	1
+1	D
+2	A
+3	B
+\N	C
+B	3	10
+D	1	20
+A	2	30
+C	\N	40
+D	1
+A	2
+B	3
+C	\N
+B	3	10
+D	1	20
+A	2	30
+C	\N	40
+D	1
+C	\N
+B	3
+A	2
+\N	C
+1	D
+2	A
 3	B
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index 856eff586a7..e1cfabed8cf 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -6,14 +6,17 @@ CREATE TABLE order_by_all
     b Nullable(Int32),
     all int,
 )
-    engine = Memory;
+engine = Memory;
 
-INSERT INTO order_by_all VALUES ('B', 3, 2), ('C', NULL, 3), ('B', 1, 1), ('A', 3, 2);
+INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
 
 SELECT a, b FROM order_by_all ORDER BY ALL;
+SELECT b, a FROM order_by_all ORDER BY ALL;
 SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b, all FROM order_by_all ORDER BY all, a;
+SELECT a, b as all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b, all FROM order_by_all ORDER BY all settings enable_order_by_all = false;
+SELECT a, b as all FROM order_by_all ORDER BY all settings enable_order_by_all = false;
+SELECT a, b, all FROM order_by_all ORDER BY all, a;
 SELECT a, b FROM order_by_all ORDER BY ALL DESC;
 SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
 

From c696c0bfe704f3007767b4e7533507a0859f606a Mon Sep 17 00:00:00 2001
From: Shani Elharrar <shani.elha@gmail.com>
Date: Thu, 14 Dec 2023 10:02:21 +0200
Subject: [PATCH 066/137] S3Common.AuthSettings: Allow passing SESSION_TOKEN to
 AWSCredentials

This sets the infrastructure of loading session_token and passing it directly
to all AWSCredentials instances that are created using the AuthSettings.

The default SESSION_TOKEN is set to an empty string as documented in AWS SDK
reference: https://sdk.amazonaws.com/cpp/api/0.12.9/d4/d27/class_aws_1_1_auth_1_1_a_w_s_credentials.html
---
 src/Coordination/KeeperSnapshotManagerS3.cpp | 2 +-
 src/IO/S3Common.cpp                          | 6 +++++-
 src/IO/S3Common.h                            | 1 +
 src/Storages/StorageS3.cpp                   | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp
index d76e310f2a3..a245ccc16df 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@@ -76,7 +76,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
 
         LOG_INFO(log, "S3 configuration was updated");
 
-        auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key);
+        auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token);
         auto headers = auth_settings.headers;
 
         static constexpr size_t s3_max_redirects = 10;
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index ffd6b6d711f..96ad6413ef5 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -109,6 +109,8 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const
 {
     auto access_key_id = config.getString(config_elem + ".access_key_id", "");
     auto secret_access_key = config.getString(config_elem + ".secret_access_key", "");
+    auto session_token = config.getString(config_elem + ".session_token", "");
+
     auto region = config.getString(config_elem + ".region", "");
     auto server_side_encryption_customer_key_base64 = config.getString(config_elem + ".server_side_encryption_customer_key_base64", "");
 
@@ -133,7 +135,7 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const
 
     return AuthSettings
     {
-        std::move(access_key_id), std::move(secret_access_key),
+        std::move(access_key_id), std::move(secret_access_key), std::move(session_token),
         std::move(region),
         std::move(server_side_encryption_customer_key_base64),
         std::move(sse_kms_config),
@@ -155,6 +157,8 @@ void AuthSettings::updateFrom(const AuthSettings & from)
         access_key_id = from.access_key_id;
     if (!from.secret_access_key.empty())
         secret_access_key = from.secret_access_key;
+    if (!from.session_token.empty())
+        session_token = from.session_token;
 
     headers = from.headers;
     region = from.region;
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index 8c45c1c34a7..ebfc07a3976 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -80,6 +80,7 @@ struct AuthSettings
 
     std::string access_key_id;
     std::string secret_access_key;
+    std::string session_token;
     std::string region;
     std::string server_side_encryption_customer_key_base64;
     ServerSideEncryptionKMSConfig server_side_encryption_kms_config;
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 556b4f5655b..e8f460525db 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1460,7 +1460,7 @@ void StorageS3::Configuration::connect(ContextPtr context)
 
     client_configuration.requestTimeoutMs = request_settings.request_timeout_ms;
 
-    auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key);
+    auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token);
     client = S3::ClientFactory::instance().create(
         client_configuration,
         url.is_virtual_hosted_style,

From 9033c96e29af74525b42647dc6df7489a3f107f1 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 17 Dec 2023 08:11:30 +0000
Subject: [PATCH 067/137] Some fixups

---
 docs/en/operations/settings/settings.md       | 35 +++++++++++++++++
 .../statements/select/order-by.md             |  8 ++--
 src/Core/Settings.h                           |  2 +-
 src/Interpreters/TreeRewriter.cpp             |  8 ++--
 .../0_stateless/02943_order_by_all.reference  | 34 ++++++++++++-----
 .../0_stateless/02943_order_by_all.sql        | 38 +++++++++++++++----
 6 files changed, 100 insertions(+), 25 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 5c509058cbb..7491699bf0d 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4152,6 +4152,41 @@ Result:
 └─────┴─────┴───────┘
 ```
 
+## enable_order_by_all {#enable-order-by-all}
+
+Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md)
+
+Possible values:
+
+- 0 — Disable ORDER BY ALL.
+- 1 — Enable ORDER BY ALL.
+
+Default value: `1`.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory();
+
+INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
+
+SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous
+
+SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all;
+```
+
+Result:
+
+```text
+┌─C1─┬─C2─┬─ALL─┐
+│ 20 │ 20 │  10 │
+│ 30 │ 10 │  20 │
+│ 10 │ 20 │  30 │
+└────┴────┴─────┘
+```
+
 ## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string}
 
 Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array.
diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index 37e42f261fa..13b05e71161 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -12,10 +12,12 @@ The `ORDER BY` clause contains
 - `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`.
 
 To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0.
+To disable sorting by `ALL`, set setting [enable_order_by_all](../../../operations/settings/settings.md#enable-order-by-all) = 0.
 
-Sort expressions or column numbers in `ORDER BY` can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction.
-If no sort order is specified explicitly, `ASC` is used as default.
-The sorting direction applies to a single expression, not to the entire list, e.g. `ORDER BY Visits DESC, SearchPhrase`. Sorting is performed case-sensitively.
+The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction.
+Unless an explicit sort order is specified, `ASC` is used by default.
+The sorting direction applies to a single expression, not to the entire list, e.g. `ORDER BY Visits DESC, SearchPhrase`.
+Also, sorting is performed case-sensitively.
 
 Rows with identical values for a sort expressions are returned in an arbitrary and non-deterministic order.
 If the `ORDER BY` clause is omitted in a `SELECT` statement, the row order is also arbitrary and non-deterministic.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9d09ff42395..9c3dafd257d 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -845,7 +845,7 @@ class IColumn;
     M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \
     M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
     M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
-    M(Bool, enable_order_by_all, true, "Clause ORDER BY supports specifying ALL, sorts by all columns in the SELECT clause.", 0)\
+    M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0)\
 
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 27f0bf2502e..9cbf24091e3 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -787,15 +787,15 @@ void expandOrderByAll(ASTSelectQuery * select_query)
 
     for (const auto & expr : select_query->select()->children)
     {
-        if (auto * identifier = expr->as<ASTIdentifier>(); identifier)
+        if (auto * identifier = expr->as<ASTIdentifier>(); identifier != nullptr)
             if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL")
                 throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-                                "The column name (all/ALL) conflicts with `ORDER BY ALL`, try to disable setting `enable_order_by_all`.");
+                                "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
 
-        if (auto * function = expr->as<ASTFunction>(); function)
+        if (auto * function = expr->as<ASTFunction>(); function != nullptr)
             if (Poco::toUpper(function->alias) == "ALL")
                 throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-                                "The column name (all/ALL) conflicts with `ORDER BY ALL`, try to disable setting `enable_order_by_all`.");
+                                "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
 
         auto elem = std::make_shared<ASTOrderByElement>();
         elem->direction = all_elem->direction;
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index 17867ec7f18..f60c7976ae6 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -1,3 +1,4 @@
+-- no modifiers
 A	2
 B	3
 C	\N
@@ -6,23 +7,38 @@ D	1
 2	A
 3	B
 \N	C
-B	3	10
-D	1	20
-A	2	30
-C	\N	40
-D	1
+-- with ASC/DESC modifiers
 A	2
 B	3
 C	\N
-B	3	10
-D	1	20
-A	2	30
-C	\N	40
+D	1
 D	1
 C	\N
 B	3
 A	2
+-- with NULLS FIRST/LAST modifiers
 \N	C
 1	D
 2	A
 3	B
+1	D
+2	A
+3	B
+\N	C
+-- what happens if some column "all" already exists?
+B	3	10
+D	1	20
+A	2	30
+C	\N	40
+D	1
+A	2
+B	3
+C	\N
+A 2
+B 3
+D 1
+\N
+B	3	10
+D	1	20
+A	2	30
+C	\N	40
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index e1cfabed8cf..c1e358178d5 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -1,24 +1,46 @@
+-- Tests that sort expression ORDER BY ALL
+
 DROP TABLE IF EXISTS order_by_all;
 
 CREATE TABLE order_by_all
 (
     a String,
     b Nullable(Int32),
-    all int,
+    all UInt64,
 )
-engine = Memory;
+ENGINE = Memory;
 
 INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
 
+SELECT '-- no modifiers';
 SELECT a, b FROM order_by_all ORDER BY ALL;
 SELECT b, a FROM order_by_all ORDER BY ALL;
-SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b as all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b, all FROM order_by_all ORDER BY all settings enable_order_by_all = false;
-SELECT a, b as all FROM order_by_all ORDER BY all settings enable_order_by_all = false;
-SELECT a, b, all FROM order_by_all ORDER BY all, a;
+
+SELECT '-- with ASC/DESC modifiers';
+SELECT a, b FROM order_by_all ORDER BY ALL ASC;
 SELECT a, b FROM order_by_all ORDER BY ALL DESC;
+
+SELECT '-- with NULLS FIRST/LAST modifiers';
 SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
+SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
 
-DROP TABLE IF EXISTS order_by_all;
+SELECT '-- what happens if some column "all" already exists?';
 
+-- columns
+SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+-- column aliases
+SELECT a, b AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+-- expressions
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+SELECT a, b, all FROM order_by_all ORDER BY all, a;
+
+DROP TABLE order_by_all;

From 41da561e3a6d2af1cd9416e0d57c72cf02590f9d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 17 Dec 2023 10:16:25 +0000
Subject: [PATCH 068/137] Fix crash

---
 src/Parsers/ParserSelectQuery.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 74321434262..641e74b5f18 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -290,9 +290,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         }
         else if (order_expression_list->children.size() == 1)
         {
-            /// ORDER BY ALL [ASC|DESC] [NULLS [FIRST|LAST]]
+            /// ORDER BY ALL
             auto * identifier = order_expression_list->children[0]->as<ASTOrderByElement>()->children[0]->as<ASTIdentifier>();
-            if (Poco::toUpper(identifier->name()) == "ALL")
+            if (identifier != nullptr && Poco::toUpper(identifier->name()) == "ALL")
                 select_query->order_by_all = true;
         }
     }

From 19c8ac567f2e92c46d2a3ef86d9d293288143d28 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 17 Dec 2023 13:51:13 +0000
Subject: [PATCH 069/137] Implement punycodeEncode()/Decode()

---
 .gitmodules                                   |   3 +
 contrib/CMakeLists.txt                        |   1 +
 contrib/idna                                  |   1 +
 contrib/idna-cmake/CMakeLists.txt             |  24 +++
 .../functions/string-functions.md             |  65 ++++++
 src/Common/config.h.in                        |   1 +
 src/Functions/CMakeLists.txt                  |   4 +
 src/Functions/punycode.cpp                    | 199 ++++++++++++++++++
 src/configure_config.cmake                    |   3 +
 .../0_stateless/02932_punycode.reference      |  35 +++
 tests/queries/0_stateless/02932_punycode.sql  |  63 ++++++
 11 files changed, 399 insertions(+)
 create mode 160000 contrib/idna
 create mode 100644 contrib/idna-cmake/CMakeLists.txt
 create mode 100644 src/Functions/punycode.cpp
 create mode 100644 tests/queries/0_stateless/02932_punycode.reference
 create mode 100644 tests/queries/0_stateless/02932_punycode.sql

diff --git a/.gitmodules b/.gitmodules
index 53ef899dd99..3b9faea3cc1 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -360,3 +360,6 @@
 [submodule "contrib/sqids-cpp"]
 	path = contrib/sqids-cpp
 	url = https://github.com/sqids/sqids-cpp.git
+[submodule "contrib/idna"]
+	path = contrib/idna
+	url = https://github.com/ada-url/idna.git
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 1b5ba15187f..02cb19d4c07 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx)
 add_contrib (libpq-cmake libpq)
 add_contrib (nuraft-cmake NuRaft)
 add_contrib (fast_float-cmake fast_float)
+add_contrib (idna-cmake idna)
 add_contrib (datasketches-cpp-cmake datasketches-cpp)
 add_contrib (incbin-cmake incbin)
 add_contrib (sqids-cpp-cmake sqids-cpp)
diff --git a/contrib/idna b/contrib/idna
new file mode 160000
index 00000000000..3c8be01d42b
--- /dev/null
+++ b/contrib/idna
@@ -0,0 +1 @@
+Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667
diff --git a/contrib/idna-cmake/CMakeLists.txt b/contrib/idna-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..1138b836192
--- /dev/null
+++ b/contrib/idna-cmake/CMakeLists.txt
@@ -0,0 +1,24 @@
+option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES})
+if ((NOT ENABLE_IDNA))
+    message (STATUS "Not using idna")
+    return()
+endif()
+set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna")
+
+set (SRCS
+    "${LIBRARY_DIR}/src/idna.cpp"
+    "${LIBRARY_DIR}/src/mapping.cpp"
+    "${LIBRARY_DIR}/src/mapping_tables.cpp"
+    "${LIBRARY_DIR}/src/normalization.cpp"
+    "${LIBRARY_DIR}/src/normalization_tables.cpp"
+    "${LIBRARY_DIR}/src/punycode.cpp"
+    "${LIBRARY_DIR}/src/to_ascii.cpp"
+    "${LIBRARY_DIR}/src/to_unicode.cpp"
+    "${LIBRARY_DIR}/src/unicode_transcoding.cpp"
+    "${LIBRARY_DIR}/src/validity.cpp"
+)
+
+add_library (_idna ${SRCS})
+target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include")
+
+add_library (ch_contrib::idna ALIAS _idna)
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 4f3c6e1e858..e3f5c037839 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -1383,6 +1383,71 @@ Result:
 └──────────────────┘
 ```
 
+## punycodeEncode
+
+Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) of a string.
+The string must be UTF8-encoded, otherwise results are undefined.
+
+**Syntax**
+
+``` sql
+punycodeEncode(val)
+```
+
+**Arguments**
+
+- `val` - Input value. [String](../data-types/string.md)
+
+**Returned value**
+
+- A Punycode representation of the input value. [String](../data-types/string.md)
+
+**Example**
+
+``` sql
+select punycodeEncode('München');
+```
+
+Result:
+
+```result
+┌─punycodeEncode('München')─┐
+│ Mnchen-3ya                │
+└───────────────────────────┘
+```
+
+## punycodeDecode
+
+Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
+
+**Syntax**
+
+``` sql
+punycodeEncode(val)
+```
+
+**Arguments**
+
+- `val` - Punycode-encoded string. [String](../data-types/string.md)
+
+**Returned value**
+
+- The plaintext of the input value. [String](../data-types/string.md)
+
+**Example**
+
+``` sql
+select punycodeDecode('Mnchen-3ya');
+```
+
+Result:
+
+```result
+┌─punycodeEncode('Mnchen-3ya')─┐
+│ München                      │
+└──────────────────────────────┘
+```
+
 ## byteHammingDistance
 
 Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
diff --git a/src/Common/config.h.in b/src/Common/config.h.in
index f84e28942c5..5b3388a3b7d 100644
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@@ -28,6 +28,7 @@
 #cmakedefine01 USE_S2_GEOMETRY
 #cmakedefine01 USE_FASTOPS
 #cmakedefine01 USE_SQIDS
+#cmakedefine01 USE_IDNA
 #cmakedefine01 USE_NLP
 #cmakedefine01 USE_VECTORSCAN
 #cmakedefine01 USE_LIBURING
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index 89676594581..a06e898b7c5 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids)
     list (APPEND PRIVATE_LIBS ch_contrib::sqids)
 endif()
 
+if (TARGET ch_contrib::idna)
+    list (APPEND PRIVATE_LIBS ch_contrib::idna)
+endif()
+
 if (TARGET ch_contrib::h3)
     list (APPEND PRIVATE_LIBS ch_contrib::h3)
 endif()
diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
new file mode 100644
index 00000000000..5279c1d7312
--- /dev/null
+++ b/src/Functions/punycode.cpp
@@ -0,0 +1,199 @@
+#include "config.h"
+
+#ifdef USE_IDNA
+
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringToString.h>
+
+#ifdef __clang__
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wnewline-eof"
+#endif
+#    include <ada/idna/punycode.h>
+#    include <ada/idna/unicode_transcoding.h>
+#ifdef __clang__
+#    pragma clang diagnostic pop
+#endif
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+struct PunycodeEncodeImpl
+{
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        const size_t rows = offsets.size();
+        res_data.resize(rows * 64); /// just a guess
+        res_offsets.resize(rows);
+
+        size_t prev_offset = 0;
+        size_t prev_res_offset = 0;
+        size_t res_data_bytes_written = 0;
+        std::u32string value_utf32;
+        std::string value_puny;
+        for (size_t row = 0; row < rows; ++row)
+        {
+            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t value_length = offsets[row] - prev_offset - 1;
+
+            size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
+            value_utf32.resize(value_utf32_length, '\0');
+
+            ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
+
+            bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
+            if (!ok)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode encoding");
+
+            const size_t bytes_to_write = value_puny.size() + 1;
+            if (res_data_bytes_written + bytes_to_write > res_data.size())
+            {
+                size_t new_size = std::max(res_data.size() * 2, res_data_bytes_written + bytes_to_write);
+                res_data.resize(new_size);
+            }
+
+            std::memcpy(&res_data[res_data_bytes_written], value_puny.data(), value_puny.size());
+            res_data_bytes_written += value_puny.size();
+
+            res_data[res_data_bytes_written] = '\0';
+            res_data_bytes_written += 1;
+
+            res_offsets[row] = prev_res_offset + bytes_to_write;
+
+            prev_offset = offsets[row];
+            prev_res_offset = res_offsets[row];
+            value_utf32.clear();
+            value_puny.clear();
+        }
+
+        res_data.resize(res_data_bytes_written);
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeEncode function");
+    }
+};
+
+struct PunycodeDecodeImpl
+{
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        const size_t rows = offsets.size();
+        res_data.resize(rows * 64); /// just a guess
+        res_offsets.resize(rows);
+
+        size_t prev_offset = 0;
+        size_t prev_res_offset = 0;
+        size_t res_data_bytes_written = 0;
+        std::u32string value_utf32;
+        std::string value_utf8;
+        for (size_t row = 0; row < rows; ++row)
+        {
+            const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
+            const size_t value_length = offsets[row] - prev_offset - 1;
+
+            std::string_view value_punycode(value, value_length);
+            bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
+            if (!ok)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode decoding");
+
+            size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
+            value_utf8.resize(utf8_length, '\0');
+
+            ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data());
+
+            const size_t bytes_to_write = value_utf8.size() + 1;
+            if (res_data_bytes_written + bytes_to_write > res_data.size())
+            {
+                size_t new_size = std::max(res_data.size() * 2, res_data_bytes_written + bytes_to_write);
+                res_data.resize(new_size);
+            }
+
+            std::memcpy(&res_data[res_data_bytes_written], value_utf8.data(), value_utf8.size());
+            res_data_bytes_written += value_utf8.size();
+
+            res_data[res_data_bytes_written] = '\0';
+            res_data_bytes_written += 1;
+
+            res_offsets[row] = prev_res_offset + bytes_to_write;
+
+            prev_offset = offsets[row];
+            prev_res_offset = res_offsets[row];
+            value_utf32.clear();
+            value_utf8.clear();
+        }
+
+        res_data.resize(res_data_bytes_written);
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeDecode function");
+    }
+};
+
+struct NamePunycodeEncode
+{
+    static constexpr auto name = "punycodeEncode";
+};
+
+struct NamePunycodeDecode
+{
+    static constexpr auto name = "punycodeDecode";
+};
+
+REGISTER_FUNCTION(Punycode)
+{
+    factory.registerFunction<FunctionStringToString<PunycodeEncodeImpl, NamePunycodeEncode>>(FunctionDocumentation{
+        .description=R"(
+Computes a Punycode representation of a string.)",
+        .syntax="punycodeEncode(str)",
+        .arguments={{"str", "Input string"}},
+        .returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT punycodeEncode('München') AS puny;",
+            R"(
+┌─puny───────┐
+│ Mnchen-3ya │
+└────────────┘
+            )"
+            }}
+    });
+
+    factory.registerFunction<FunctionStringToString<PunycodeDecodeImpl, NamePunycodeDecode>>(FunctionDocumentation{
+        .description=R"(
+Computes a Punycode representation of a string.)",
+        .syntax="punycodeDecode(str)",
+        .arguments={{"str", "A Punycode-encoded string"}},
+        .returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
+        .examples={
+            {"simple",
+            "SELECT punycodeDecode('Mnchen-3ya') AS plain;",
+            R"(
+┌─plain───┐
+│ München │
+└─────────┘
+            )"
+            }}
+    });
+}
+
+}
+
+#endif
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index c3c6d9be6da..01c4fd7b5f8 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -131,6 +131,9 @@ endif()
 if (TARGET ch_contrib::sqids)
     set(USE_SQIDS 1)
 endif()
+if (TARGET ch_contrib::idna)
+    set(USE_IDNA 1)
+endif()
 if (TARGET ch_contrib::vectorscan)
     set(USE_VECTORSCAN 1)
 endif()
diff --git a/tests/queries/0_stateless/02932_punycode.reference b/tests/queries/0_stateless/02932_punycode.reference
new file mode 100644
index 00000000000..7a39a221e08
--- /dev/null
+++ b/tests/queries/0_stateless/02932_punycode.reference
@@ -0,0 +1,35 @@
+-- Negative tests
+-- Regular cases
+a	a-	a
+A	A-	A
+--	---	--
+London	London-	London
+Lloyd-Atkinson	Lloyd-Atkinson-	Lloyd-Atkinson
+This has spaces	This has spaces-	This has spaces
+-> $1.00 <-	-> $1.00 <--	-> $1.00 <-
+а	80a	а
+ü	tda	ü
+α	mxa	α
+例	fsq	例
+😉	n28h	😉
+αβγ	mxacd	αβγ
+München	Mnchen-3ya	München
+Mnchen-3ya	Mnchen-3ya-	Mnchen-3ya
+München-Ost	Mnchen-Ost-9db	München-Ost
+Bahnhof München-Ost	Bahnhof Mnchen-Ost-u6b	Bahnhof München-Ost
+abæcdöef	abcdef-qua4k	abæcdöef
+правда	80aafi6cg	правда
+ยจฆฟคฏข	22cdfh1b8fsa	ยจฆฟคฏข
+ドメイン名例	eckwd4c7cu47r2wf	ドメイン名例
+MajiでKoiする5秒前	MajiKoi5-783gue6qz075azm5e	MajiでKoiする5秒前
+「bücher」	bcher-kva8445foa	「bücher」
+团淄	3bs854c	团淄
+-- Special cases
+
+
+\N
+\N
+Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.	Wenn Sie ... vom Hauptbahnhof in Mnchen ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken mssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in Mnchen starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die groen Flughfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle h in Frankreich oder in ...h... in ... in...h...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strau. Dann starten Sie praktisch hier am Hauptbahnhof in Mnchen. Das bedeutet natrlich, dass der Hauptbahnhof im Grunde genommen nher an Bayern ... an die bayerischen Stdte heranwchst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.-pu7fjtp0npc1ar54cibk471wdc9d18axa	Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.
+München	Mnchen-3ya	München
+abc	abc-	abc
+aäoöuü	aou-qla5gqb	aäoöuü
diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql
new file mode 100644
index 00000000000..fd128507a8f
--- /dev/null
+++ b/tests/queries/0_stateless/02932_punycode.sql
@@ -0,0 +1,63 @@
+-- Tags: no-fasttest
+-- no-fasttest: requires idna library
+
+SELECT '-- Negative tests';
+
+SELECT punycodeDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeEncode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT punycodeDecode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeEncode('two', 'strings'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT punycodeDecode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN }
+SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN }
+
+SELECT '-- Regular cases';
+
+-- The test cases originate from the idna unit tests:
+--- https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
+
+SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'A' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '--' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'London' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'Lloyd-Atkinson' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'This has spaces' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '-> $1.00 <-' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'а' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'ü' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'α' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '😉' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'αβγ' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'München' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'Mnchen-3ya' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'Bahnhof München-Ost' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'abæcdöef' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'правда' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'ยจฆฟคฏข' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'ドメイン名例' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT 'MajiでKoiする5秒前' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '「bücher」' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+SELECT '团淄' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+
+SELECT '-- Special cases';
+
+SELECT punycodeDecode('');
+SELECT punycodeEncode('');
+SELECT punycodeDecode(NULL);
+SELECT punycodeEncode(NULL);
+
+-- garbage Punycode-encoded values
+SELECT punycodeDecode('no punycode'); -- { serverError BAD_ARGUMENTS }
+
+-- long input
+SELECT 'Wenn Sie ... vom Hauptbahnhof in München ... mit zehn Minuten, ohne, dass Sie am Flughafen noch einchecken müssen, dann starten Sie im Grunde genommen am Flughafen ... am ... am Hauptbahnhof in München starten Sie Ihren Flug. Zehn Minuten. Schauen Sie sich mal die großen Flughäfen an, wenn Sie in Heathrow in London oder sonst wo, meine se ... Charles de Gaulle äh in Frankreich oder in ...äh... in ... in...äh...in Rom. Wenn Sie sich mal die Entfernungen ansehen, wenn Sie Frankfurt sich ansehen, dann werden Sie feststellen, dass zehn Minuten... Sie jederzeit locker in Frankfurt brauchen, um ihr Gate zu finden. Wenn Sie vom Flug ... vom ... vom Hauptbahnhof starten - Sie steigen in den Hauptbahnhof ein, Sie fahren mit dem Transrapid in zehn Minuten an den Flughafen in ... an den Flughafen Franz Josef Strauß. Dann starten Sie praktisch hier am Hauptbahnhof in München. Das bedeutet natürlich, dass der Hauptbahnhof im Grunde genommen näher an Bayern ... an die bayerischen Städte heranwächst, weil das ja klar ist, weil auf dem Hauptbahnhof viele Linien aus Bayern zusammenlaufen.' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;
+
+-- non-const values
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (str String) ENGINE=MergeTree ORDER BY str;
+INSERT INTO tab VALUES ('abc') ('aäoöuü') ('München');
+SELECT str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original FROM tab;
+DROP TABLE tab;

From b07cde5ebb1cd2f054bd06462759879a2fec4267 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Mon, 18 Dec 2023 05:35:38 +0000
Subject: [PATCH 070/137] use const reference instead for pointer to values

Co-authored-by: Robert Schulze <robert@clickhouse.com>
Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 .../Serializations/SerializationEnum.cpp      | 24 +++++++++----------
 .../Serializations/SerializationEnum.h        | 23 ++++++++++--------
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationEnum.cpp b/src/DataTypes/Serializations/SerializationEnum.cpp
index ee427bc1043..9b3a437e9cf 100644
--- a/src/DataTypes/Serializations/SerializationEnum.cpp
+++ b/src/DataTypes/Serializations/SerializationEnum.cpp
@@ -11,13 +11,13 @@ namespace DB
 template <typename Type>
 void SerializationEnum<Type>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+    writeString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeEscapedString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
+    writeEscapedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
 }
 
 template <typename Type>
@@ -30,14 +30,14 @@ void SerializationEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffe
         /// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
         std::string field_name;
         readEscapedString(field_name, istr);
-        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name), true));
+        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
     }
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeQuotedString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+    writeQuotedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
 }
 
 template <typename Type>
@@ -45,7 +45,7 @@ void SerializationEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer
 {
     std::string field_name;
     readQuotedStringWithSQLStyle(field_name, istr);
-    assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name)));
+    assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name)));
 }
 
 template <typename Type>
@@ -61,20 +61,20 @@ void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer
     {
         std::string field_name;
         readStringUntilEOF(field_name, istr);
-        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name), true));
+        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
     }
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    writeJSONString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
+    writeJSONString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeXMLStringForTextElement(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
+    writeXMLStringForTextElement(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
 }
 
 template <typename Type>
@@ -86,14 +86,14 @@ void SerializationEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer &
     {
         std::string field_name;
         readJSONString(field_name, istr);
-        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name)));
+        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name)));
     }
 }
 
 template <typename Type>
 void SerializationEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
 {
-    writeCSVString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+    writeCSVString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
 }
 
 template <typename Type>
@@ -105,7 +105,7 @@ void SerializationEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer &
     {
         std::string field_name;
         readCSVString(field_name, istr, settings.csv);
-        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values->getValue(StringRef(field_name), true));
+        assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
     }
 }
 
@@ -114,7 +114,7 @@ void SerializationEnum<Type>::serializeTextMarkdown(
     const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (settings.markdown.escape_special_characters)
-        writeMarkdownEscapedString(ref_enum_values->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
+        writeMarkdownEscapedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
     else
         serializeTextEscaped(column, row_num, ostr, settings);
 }
diff --git a/src/DataTypes/Serializations/SerializationEnum.h b/src/DataTypes/Serializations/SerializationEnum.h
index 8611617de06..03b134e59a6 100644
--- a/src/DataTypes/Serializations/SerializationEnum.h
+++ b/src/DataTypes/Serializations/SerializationEnum.h
@@ -16,15 +16,18 @@ public:
     using typename SerializationNumber<Type>::ColumnType;
     using Values = EnumValues<Type>::Values;
 
-    SerializationEnum() = delete;
-    /// To explicitly create a SerializationEnum from Values
-    explicit SerializationEnum(const Values & values_) : own_enum_values(values_), ref_enum_values(&own_enum_values.value()) { }
-    /// To create a SerializationEnum from an IDataType instance, will reuse EnumValues from the type
-    /// Motivation: some Enum type has many elements, and building EnumValues is not trivial
-    /// This constructor allow to create many SerializationEnum from same IDataType without rebuilding
-    /// EnumValues for every call, so it's useful to get default serialization.
+    // SerializationEnum can be constructed in two ways:
+    /// - Make a copy of the Enum name-to-type mapping.
+    /// - Only store a reference to an existing mapping. This is faster if the Enum has a lot of different values or if SerializationEnum is
+    ///   constructed very frequently. Make sure that the pointed-to mapping has a longer lifespan than SerializationEnum!
+
+    explicit SerializationEnum(const Values & values_)
+        : own_enum_values(values_), ref_enum_values(own_enum_values.value())
+    {
+    }
+
     explicit SerializationEnum(const std::shared_ptr<const DataTypeEnum<Type>> & enum_type)
-        : own_enum_type(enum_type), ref_enum_values(static_cast<const EnumValues<Type> *>(enum_type.get()))
+        : own_enum_type(enum_type), ref_enum_values(*enum_type)
     {
     }
 
@@ -47,12 +50,12 @@ public:
     {
         FieldType x;
         readText(x, istr);
-        return ref_enum_values->findByValue(x)->first;
+        return ref_enum_values.findByValue(x)->first;
     }
 
     std::optional<EnumValues<Type>> own_enum_values;
     std::shared_ptr<const DataTypeEnum<Type>> own_enum_type;
-    const EnumValues<Type> * ref_enum_values;
+    const EnumValues<Type> & ref_enum_values;
 };
 
 }

From 69e6e59e9616a24a208ff393ba420ee8fa03245d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 18 Dec 2023 08:34:53 +0100
Subject: [PATCH 071/137] Add a test for #49708

---
 src/Storages/StorageFile.cpp                    |  5 -----
 src/TableFunctions/TableFunctionFile.cpp        |  3 ---
 ...2946_parallel_replicas_distributed.reference |  1 +
 .../02946_parallel_replicas_distributed.sql     | 17 +++++++++++++++++
 4 files changed, 18 insertions(+), 8 deletions(-)
 create mode 100644 tests/queries/0_stateless/02946_parallel_replicas_distributed.reference
 create mode 100644 tests/queries/0_stateless/02946_parallel_replicas_distributed.sql

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 47b915e3ed8..87cb3312544 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -10,7 +10,6 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
 
-#include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTIdentifier_fwd.h>
 #include <Parsers/ASTInsertQuery.h>
@@ -26,8 +25,6 @@
 #include <IO/Archives/createArchiveReader.h>
 #include <IO/Archives/IArchiveReader.h>
 
-#include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeString.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
 #include <Processors/Formats/IInputFormat.h>
@@ -39,7 +36,6 @@
 #include <Processors/Formats/ISchemaReader.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
-#include <Processors/ResizeProcessor.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 
 #include <Common/escapeForFileName.h>
@@ -56,7 +52,6 @@
 #include <unistd.h>
 #include <filesystem>
 #include <shared_mutex>
-#include <cmath>
 #include <algorithm>
 
 #ifdef __clang__
diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp
index 45a7378ca82..8a9dde374ec 100644
--- a/src/TableFunctions/TableFunctionFile.cpp
+++ b/src/TableFunctions/TableFunctionFile.cpp
@@ -2,17 +2,14 @@
 #include <TableFunctions/ITableFunctionFileLike.h>
 #include <TableFunctions/TableFunctionFile.h>
 
-#include "Parsers/IAST_fwd.h"
 #include "registerTableFunctions.h"
 #include <Access/Common/AccessFlags.h>
 #include <Interpreters/Context.h>
 #include <Storages/ColumnsDescription.h>
 #include <Storages/StorageFile.h>
-#include <Storages/VirtualColumnUtils.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Formats/FormatFactory.h>
-#include <Parsers/ASTIdentifier_fwd.h>
 
 
 namespace DB
diff --git a/tests/queries/0_stateless/02946_parallel_replicas_distributed.reference b/tests/queries/0_stateless/02946_parallel_replicas_distributed.reference
new file mode 100644
index 00000000000..ea4483ec305
--- /dev/null
+++ b/tests/queries/0_stateless/02946_parallel_replicas_distributed.reference
@@ -0,0 +1 @@
+100	4950
diff --git a/tests/queries/0_stateless/02946_parallel_replicas_distributed.sql b/tests/queries/0_stateless/02946_parallel_replicas_distributed.sql
new file mode 100644
index 00000000000..6c7fbd0f752
--- /dev/null
+++ b/tests/queries/0_stateless/02946_parallel_replicas_distributed.sql
@@ -0,0 +1,17 @@
+DROP TABLE IF EXISTS test;
+DROP TABLE IF EXISTS test_d;
+
+CREATE TABLE test (id UInt64, date Date)
+ENGINE = MergeTree
+ORDER BY id
+AS select *, '2023-12-25' from numbers(100);
+
+CREATE TABLE IF NOT EXISTS test_d as test
+ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test);
+
+SELECT count(), sum(id)
+FROM test_d
+SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0;
+
+DROP TABLE test_d;
+DROP TABLE test;

From 65728f14ce92731193280985460acf0bb6aa4f5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 18 Dec 2023 10:40:02 +0100
Subject: [PATCH 072/137] Review error codes

---
 src/Common/ArrayCache.h                   | 4 ++--
 src/Common/QueryProfiler.cpp              | 2 +-
 src/Common/ThreadFuzzer.cpp               | 2 +-
 src/Common/createHardLink.cpp             | 2 +-
 src/IO/AsynchronousReadBufferFromFile.cpp | 6 +-----
 src/IO/ReadBufferFromFile.cpp             | 6 +-----
 src/IO/WriteBufferFromFile.cpp            | 3 ++-
 src/IO/WriteBufferFromFileDescriptor.cpp  | 2 +-
 src/Server/KeeperTCPHandler.cpp           | 2 +-
 9 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/Common/ArrayCache.h b/src/Common/ArrayCache.h
index 3584ffd5ad4..47b91ff4eef 100644
--- a/src/Common/ArrayCache.h
+++ b/src/Common/ArrayCache.h
@@ -179,13 +179,13 @@ private:
         {
             ptr = mmap(address_hint, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
             if (MAP_FAILED == ptr)
-                throw ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}.", ReadableSize(size));
+                throw ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}", ReadableSize(size));
         }
 
         ~Chunk()
         {
             if (ptr && 0 != munmap(ptr, size))
-                throw ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot munmap {}.", ReadableSize(size));
+                throw ErrnoException(DB::ErrorCodes::CANNOT_MUNMAP, "Allocator: Cannot munmap {}", ReadableSize(size));
         }
 
         Chunk(Chunk && other) noexcept : ptr(other.ptr), size(other.size)
diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp
index 7398c62a882..16c8d4e223f 100644
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@@ -244,7 +244,7 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(UInt64 thread_id, int clock_t
         throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to add signal to mask for query profiler");
 
     if (sigaction(pause_signal, &sa, nullptr))
-        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to setup signal handler for query profiler");
+        throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Failed to setup signal handler for query profiler");
 
     try
     {
diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp
index 50e2d81047d..0868613d880 100644
--- a/src/Common/ThreadFuzzer.cpp
+++ b/src/Common/ThreadFuzzer.cpp
@@ -269,7 +269,7 @@ void ThreadFuzzer::setup() const
 #endif
 
     if (sigaction(SIGPROF, &sa, nullptr))
-        throw ErrnoException(ErrorCodes::CANNOT_MANIPULATE_SIGSET, "Failed to setup signal handler for thread fuzzer");
+        throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Failed to setup signal handler for thread fuzzer");
 
     static constexpr UInt32 timer_precision = 1000000;
 
diff --git a/src/Common/createHardLink.cpp b/src/Common/createHardLink.cpp
index dcaf5e0fb10..f8a9dfa97c1 100644
--- a/src/Common/createHardLink.cpp
+++ b/src/Common/createHardLink.cpp
@@ -33,7 +33,7 @@ void createHardLink(const String & source_path, const String & destination_path)
 
             if (source_descr.st_ino != destination_descr.st_ino)
                 ErrnoException::throwFromPathWithErrno(
-                    ErrorCodes::CANNOT_STAT,
+                    ErrorCodes::CANNOT_LINK,
                     destination_path,
                     link_errno,
                     "Destination file {} already exists and has a different inode",
diff --git a/src/IO/AsynchronousReadBufferFromFile.cpp b/src/IO/AsynchronousReadBufferFromFile.cpp
index 3c2c923ee46..c6fe16a7f14 100644
--- a/src/IO/AsynchronousReadBufferFromFile.cpp
+++ b/src/IO/AsynchronousReadBufferFromFile.cpp
@@ -52,11 +52,7 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile(
     if (o_direct)
     {
         if (fcntl(fd, F_NOCACHE, 1) == -1)
-            ErrnoException::throwFromPath(
-                errno == ENOENT ? ErrorCodes::CANNOT_OPEN_FILE : ErrorCodes::CANNOT_OPEN_FILE,
-                file_name,
-                "Cannot set F_NOCACHE on file {}",
-                file_name);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot set F_NOCACHE on file {}", file_name);
     }
 #endif
 }
diff --git a/src/IO/ReadBufferFromFile.cpp b/src/IO/ReadBufferFromFile.cpp
index 7f98c6dddfa..cb987171bad 100644
--- a/src/IO/ReadBufferFromFile.cpp
+++ b/src/IO/ReadBufferFromFile.cpp
@@ -51,11 +51,7 @@ ReadBufferFromFile::ReadBufferFromFile(
     if (o_direct)
     {
         if (fcntl(fd, F_NOCACHE, 1) == -1)
-            ErrnoException::throwFromPath(
-                errno == ENOENT ? ErrorCodes::CANNOT_OPEN_FILE : ErrorCodes::CANNOT_OPEN_FILE,
-                file_name,
-                "Cannot set F_NOCACHE on file {}",
-                file_name);
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot set F_NOCACHE on file {}", file_name);
     }
 #endif
 }
diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp
index e61f22ba3e7..0ca6c26f08c 100644
--- a/src/IO/WriteBufferFromFile.cpp
+++ b/src/IO/WriteBufferFromFile.cpp
@@ -46,7 +46,8 @@ WriteBufferFromFile::WriteBufferFromFile(
     fd = ::open(file_name.c_str(), flags == -1 ? O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC : flags | O_CLOEXEC, mode);
 
     if (-1 == fd)
-        ErrnoException::throwFromPath(ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot open file {}", file_name);
+        ErrnoException::throwFromPath(
+            errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, file_name, "Cannot open file {}", file_name);
 
 #ifdef OS_DARWIN
     if (o_direct)
diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp
index ff04bfdd906..813ef0deab9 100644
--- a/src/IO/WriteBufferFromFileDescriptor.cpp
+++ b/src/IO/WriteBufferFromFileDescriptor.cpp
@@ -137,7 +137,7 @@ void WriteBufferFromFileDescriptor::sync()
     ProfileEvents::increment(ProfileEvents::FileSyncElapsedMicroseconds, watch.elapsedMicroseconds());
 
     if (-1 == res)
-        ErrnoException::throwFromPath(ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR, getFileName(), "Cannot fsync {}", getFileName());
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_FSYNC, getFileName(), "Cannot fsync {}", getFileName());
 }
 
 
diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp
index 764ebcdf0b9..76b84f0ce6e 100644
--- a/src/Server/KeeperTCPHandler.cpp
+++ b/src/Server/KeeperTCPHandler.cpp
@@ -92,7 +92,7 @@ struct SocketInterruptablePollWrapper
             int err = ::close(epollfd);
             chassert(!err || errno == EINTR);
 
-            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot epoll_create");
+            throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot insert socket into epoll queue");
         }
         pipe_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
         pipe_event.data.fd = pipe.fds_rw[0];

From 403d9c809d0158a581260a460f22bcc92b8e122b Mon Sep 17 00:00:00 2001
From: joelynch <joelynch112@gmail.com>
Date: Mon, 18 Dec 2023 10:41:12 +0100
Subject: [PATCH 073/137] fix test for postgresql addresses_expr

---
 .../configs/named_collections.xml             | 10 +++++++-
 .../test_storage_postgresql/test.py           | 24 +++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_postgresql/configs/named_collections.xml b/tests/integration/test_storage_postgresql/configs/named_collections.xml
index ebe9f7ce9ce..4923c21d0a6 100644
--- a/tests/integration/test_storage_postgresql/configs/named_collections.xml
+++ b/tests/integration/test_storage_postgresql/configs/named_collections.xml
@@ -16,7 +16,8 @@
         <postgres3>
             <user>postgres</user>
             <password>mysecretpassword</password>
-            <addresses_expr>postgres1:1111</addresses_expr>
+            <host>postgres1</host>
+            <port>1111</port>
             <database>postgres</database>
             <table>test_table</table>
         </postgres3>
@@ -28,5 +29,12 @@
             <database>postgres</database>
             <table>test_replicas</table>
         </postgres4>
+        <postgres5>
+            <user>postgres</user>
+            <password>mysecretpassword</password>
+            <addresses_expr>postgres1:5432</addresses_expr>
+            <database>postgres</database>
+            <table>test_table</table>
+        </postgres5>
     </named_collections>
 </clickhouse>
diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py
index 11729a5ab18..f4108c80526 100644
--- a/tests/integration/test_storage_postgresql/test.py
+++ b/tests/integration/test_storage_postgresql/test.py
@@ -82,6 +82,30 @@ def test_postgres_select_insert(started_cluster):
     cursor.execute(f"DROP TABLE {table_name} ")
 
 
+def test_postgres_addresses_expr(started_cluster):
+    cursor = started_cluster.postgres_conn.cursor()
+    table_name = "test_table"
+    table = f"""postgresql(`postgres5`)"""
+    cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
+    cursor.execute(f"CREATE TABLE {table_name} (a integer, b text, c integer)")
+
+    node1.query(
+        f"""
+        INSERT INTO TABLE FUNCTION {table}
+        SELECT number, concat('name_', toString(number)), 3 from numbers(10000)"""
+    )
+    check1 = f"SELECT count() FROM {table}"
+    check2 = f"SELECT Sum(c) FROM {table}"
+    check3 = f"SELECT count(c) FROM {table} WHERE a % 2 == 0"
+    check4 = f"SELECT count() FROM {table} WHERE b LIKE concat('name_', toString(1))"
+    assert (node1.query(check1)).rstrip() == "10000"
+    assert (node1.query(check2)).rstrip() == "30000"
+    assert (node1.query(check3)).rstrip() == "5000"
+    assert (node1.query(check4)).rstrip() == "1"
+
+    cursor.execute(f"DROP TABLE {table_name} ")
+
+
 def test_postgres_conversions(started_cluster):
     cursor = started_cluster.postgres_conn.cursor()
     cursor.execute(f"DROP TABLE IF EXISTS test_types")

From 44f1644622a126ebafc1473f18dfefac00d9fd97 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Mon, 18 Dec 2023 11:01:32 +0100
Subject: [PATCH 074/137] Update
 02944_dynamically_change_filesystem_cache_size.reference

---
 ..._dynamically_change_filesystem_cache_size.reference | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
index 7fa32ec2b09..cd155b6ca29 100644
--- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
+++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
@@ -1,20 +1,20 @@
-100	10	10	10	0	0	98	10	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+100	10	10	10	0	0	98	10	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	5	5000	0	1
 0
 10
 98
 set max_size from 100 to 10
-10	10	10	10	0	0	8	1	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+10	10	10	10	0	0	8	1	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	5	5000	0	1
 1
 8
 set max_size from 10 to 100
-100	10	10	10	0	0	8	1	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+100	10	10	10	0	0	8	1	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	5	5000	0	1
 10
 98
 set max_elements from 10 to 2
-100	2	10	10	0	0	18	2	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+100	2	10	10	0	0	18	2	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	5	5000	0	1
 2
 18
 set max_elements from 2 to 10
-100	10	10	10	0	0	18	2	/var/lib/clickhouse/caches/s3_cache_02944/	5	5000	0	1
+100	10	10	10	0	0	18	2	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	5	5000	0	1
 10
 98

From 29d70c12f63fcdae8991c97cf3fb83987082cecf Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 18 Dec 2023 10:47:40 +0000
Subject: [PATCH 075/137] Fix CI checks

---
 src/Functions/FunctionSqid.cpp                     | 2 +-
 src/Functions/punycode.cpp                         | 2 +-
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionSqid.cpp b/src/Functions/FunctionSqid.cpp
index 546263914c2..4517bba963e 100644
--- a/src/Functions/FunctionSqid.cpp
+++ b/src/Functions/FunctionSqid.cpp
@@ -1,6 +1,6 @@
 #include "config.h"
 
-#ifdef ENABLE_SQIDS
+#if USE_SQIDS
 
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
index 5279c1d7312..e90cba82b1f 100644
--- a/src/Functions/punycode.cpp
+++ b/src/Functions/punycode.cpp
@@ -1,6 +1,6 @@
 #include "config.h"
 
-#ifdef USE_IDNA
+#if USE_IDNA
 
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 637ab0ce6d4..d863d0e865c 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -711,6 +711,7 @@ Promtail
 Protobuf
 ProtobufSingle
 ProxySQL
+Punycode
 PyArrow
 PyCharm
 QEMU
@@ -2069,6 +2070,8 @@ pseudorandom
 pseudorandomize
 psql
 ptrs
+punycodeDecode
+punycodeEncode
 pushdown
 pwrite
 py

From fdda0cf8ac3f94e8c1b8f7b28a0f7bf172938ba3 Mon Sep 17 00:00:00 2001
From: chen768959 <934103231@qq.com>
Date: Mon, 18 Dec 2023 21:43:07 +0800
Subject: [PATCH 076/137] Literal aliases incorrectly classified as
 'complex_aliases'.

---
 .../RequiredSourceColumnsVisitor.cpp          |  3 ++-
 ..._literal_alias_misclassification.reference |  2 ++
 .../02946_literal_alias_misclassification.sql | 24 +++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02946_literal_alias_misclassification.reference
 create mode 100644 tests/queries/0_stateless/02946_literal_alias_misclassification.sql

diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp
index 1bcec02f0c0..c07d783788a 100644
--- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp
+++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp
@@ -8,6 +8,7 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/ASTInterpolateElement.h>
+#include <Parsers/ASTLiteral.h>
 
 namespace DB
 {
@@ -126,7 +127,7 @@ void RequiredSourceColumnsMatcher::visit(const ASTSelectQuery & select, const AS
 
         if (const auto * identifier = node->as<ASTIdentifier>())
             data.addColumnIdentifier(*identifier);
-        else
+        else if (!node->as<ASTLiteral>())
             data.addColumnAliasIfAny(*node);
     }
 
diff --git a/tests/queries/0_stateless/02946_literal_alias_misclassification.reference b/tests/queries/0_stateless/02946_literal_alias_misclassification.reference
new file mode 100644
index 00000000000..d8e5a437352
--- /dev/null
+++ b/tests/queries/0_stateless/02946_literal_alias_misclassification.reference
@@ -0,0 +1,2 @@
+const	1
+const	2
diff --git a/tests/queries/0_stateless/02946_literal_alias_misclassification.sql b/tests/queries/0_stateless/02946_literal_alias_misclassification.sql
new file mode 100644
index 00000000000..0d001bf1e4c
--- /dev/null
+++ b/tests/queries/0_stateless/02946_literal_alias_misclassification.sql
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS literal_alias_misclassification;
+
+CREATE TABLE literal_alias_misclassification
+(
+    `id` Int64,
+    `a` Nullable(String),
+    `b` Nullable(Int64)
+)
+ENGINE = MergeTree
+ORDER BY id;
+
+
+INSERT INTO literal_alias_misclassification values(1, 'a', 1);
+INSERT INTO literal_alias_misclassification values(2, 'b', 2);
+
+SELECT 'const' AS r, b 
+FROM
+  ( SELECT a AS r, b FROM literal_alias_misclassification ) AS t1
+  LEFT JOIN
+  ( SELECT a AS r FROM literal_alias_misclassification ) AS t2 
+  ON t1.r = t2.r
+ORDER BY b;
+
+DROP TABLE IF EXISTS literal_alias_misclassification;

From 028d1c9478e1dc264907edaf2616d776c68bf8f4 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Mon, 18 Dec 2023 14:47:54 +0100
Subject: [PATCH 077/137] refine error message

---
 src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 8445c513372..d7b444f7763 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4063,10 +4063,10 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
         Strings restored;
         Strings error_parts;
 
-        auto is_appropriate_state = [] (DataPartState state)
+        auto is_appropriate_state = [] (const DataPartPtr & part_)
         {
-            if (state != DataPartState::Outdated)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to restore a part from unexpected state: {}", state);
+            if (part_->getState() != DataPartState::Outdated)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to restore a part {} from unexpected state: {}", part_->name, part_->getState());
             return true;
         };
 
@@ -4111,7 +4111,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
         for (const auto & part_candidate_in_partition : parts_candidates | std::views::reverse)
         {
             if (part->info.contains(part_candidate_in_partition->info)
-                && is_appropriate_state(part_candidate_in_partition->getState()))
+                && is_appropriate_state(part_candidate_in_partition))
             {
                 String out_reason;
                 /// Outdated parts can itersect legally (because of DROP_PART) here it's okay, we

From 99d1659b4c813c3afbae015de928aed715666602 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 18 Dec 2023 15:28:54 +0100
Subject: [PATCH 078/137] Replace rust's BLAKE3 with llvm's implementation

---
 rust/BLAKE3/CMakeLists.txt       |   3 -
 rust/BLAKE3/Cargo.toml           |  20 -
 rust/BLAKE3/include/blake3.h     |  15 -
 rust/BLAKE3/src/lib.rs           |  31 --
 rust/CMakeLists.txt              |   1 -
 rust/Cargo.lock                  | 683 +++++++++++++++----------------
 rust/Cargo.toml                  |   1 -
 src/Functions/FunctionsHashing.h |  16 +-
 src/configure_config.cmake       |   4 +-
 9 files changed, 346 insertions(+), 428 deletions(-)
 delete mode 100644 rust/BLAKE3/CMakeLists.txt
 delete mode 100644 rust/BLAKE3/Cargo.toml
 delete mode 100644 rust/BLAKE3/include/blake3.h
 delete mode 100644 rust/BLAKE3/src/lib.rs

diff --git a/rust/BLAKE3/CMakeLists.txt b/rust/BLAKE3/CMakeLists.txt
deleted file mode 100644
index ceb0a647b66..00000000000
--- a/rust/BLAKE3/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
-target_include_directories(_ch_rust_blake3 INTERFACE include)
-add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)
diff --git a/rust/BLAKE3/Cargo.toml b/rust/BLAKE3/Cargo.toml
deleted file mode 100644
index ed414fa54c1..00000000000
--- a/rust/BLAKE3/Cargo.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-[package]
-name = "_ch_rust_blake3"
-version = "0.1.0"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-blake3 = "1.2.0"
-libc = "0.2.132"
-
-[lib]
-crate-type = ["staticlib"]
-
-[profile.release]
-debug = true
-
-[profile.release-thinlto]
-inherits = "release"
-# BLAKE3 module requires "full" LTO (not "thin") to get additional 10% performance benefit
-lto = true
diff --git a/rust/BLAKE3/include/blake3.h b/rust/BLAKE3/include/blake3.h
deleted file mode 100644
index 5dc7d5bd902..00000000000
--- a/rust/BLAKE3/include/blake3.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef BLAKE3_H
-#define BLAKE3_H
-
-#include <cstdint>
-
-
-extern "C" {
-
-char *blake3_apply_shim(const char *begin, uint32_t _size, uint8_t *out_char_data);
-
-void blake3_free_char_pointer(char *ptr_to_free);
-
-} // extern "C"
-
-#endif /* BLAKE3_H */
diff --git a/rust/BLAKE3/src/lib.rs b/rust/BLAKE3/src/lib.rs
deleted file mode 100644
index 7a3be8a2ae7..00000000000
--- a/rust/BLAKE3/src/lib.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-extern crate blake3;
-extern crate libc;
-
-use std::ffi::{CString};
-use std::slice;
-use std::os::raw::c_char;
-
-#[no_mangle]
-pub unsafe extern "C" fn blake3_apply_shim(
-    begin: *const c_char,
-    size: u32,
-    out_char_data: *mut u8,
-) -> *mut c_char {
-    if begin.is_null() {
-        let err_str = CString::new("input was a null pointer").unwrap();
-        return err_str.into_raw();
-    }
-    let input_res = slice::from_raw_parts(begin as *const u8, size as usize);
-    let mut hasher = blake3::Hasher::new();
-    hasher.update(input_res);
-    let mut reader = hasher.finalize_xof();
-
-    reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN));
-    std::ptr::null_mut()
-}
-
-// Freeing memory according to docs: https://doc.rust-lang.org/std/ffi/struct.CString.html#method.into_raw
-#[no_mangle]
-pub unsafe extern "C" fn blake3_free_char_pointer(ptr_to_free: *mut c_char) {
-    std::mem::drop(CString::from_raw(ptr_to_free));
-}
diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt
index 5ea806baa3b..66694ee16f8 100644
--- a/rust/CMakeLists.txt
+++ b/rust/CMakeLists.txt
@@ -99,6 +99,5 @@ function(add_rust_subdirectory src)
         VERBATIM)
 endfunction()
 
-add_rust_subdirectory (BLAKE3)
 add_rust_subdirectory (skim)
 add_rust_subdirectory (prql)
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index 04569cd3b3a..86bbec5579f 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -2,14 +2,6 @@
 # It is not intended for manual editing.
 version = 3
 
-[[package]]
-name = "_ch_rust_blake3"
-version = "0.1.0"
-dependencies = [
- "blake3",
- "libc",
-]
-
 [[package]]
 name = "_ch_rust_prql"
 version = "0.1.0"
@@ -30,9 +22,9 @@ dependencies = [
 
 [[package]]
 name = "addr2line"
-version = "0.20.0"
+version = "0.21.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
+checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
 dependencies = [
  "gimli",
 ]
@@ -45,24 +37,31 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
 [[package]]
 name = "ahash"
-version = "0.7.6"
+version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
+checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a"
 dependencies = [
- "getrandom",
+ "cfg-if",
  "once_cell",
  "version_check",
+ "zerocopy",
 ]
 
 [[package]]
 name = "aho-corasick"
-version = "1.0.2"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
+checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
 dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "allocator-api2"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
+
 [[package]]
 name = "android-tzdata"
 version = "0.1.1"
@@ -95,43 +94,43 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.1"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
+checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.1"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333"
+checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
 dependencies = [
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle-query"
-version = "1.0.0"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
+checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
+checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c"
 dependencies = [
  "anstyle",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
 name = "anyhow"
-version = "1.0.72"
+version = "1.0.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
+checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
 dependencies = [
  "backtrace",
 ]
@@ -146,12 +145,6 @@ dependencies = [
  "yansi",
 ]
 
-[[package]]
-name = "arrayref"
-version = "0.3.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
-
 [[package]]
 name = "arrayvec"
 version = "0.7.4"
@@ -166,9 +159,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
 [[package]]
 name = "backtrace"
-version = "0.3.68"
+version = "0.3.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
+checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
 dependencies = [
  "addr2line",
  "cc",
@@ -193,44 +186,24 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.3.3"
+version = "2.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
-
-[[package]]
-name = "blake3"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5"
-dependencies = [
- "arrayref",
- "arrayvec",
- "cc",
- "cfg-if",
- "constant_time_eq",
- "digest",
-]
-
-[[package]]
-name = "block-buffer"
-version = "0.10.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
-dependencies = [
- "generic-array",
-]
+checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
 
 [[package]]
 name = "bumpalo"
-version = "3.13.0"
+version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
+checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
 
 [[package]]
 name = "cc"
-version = "1.0.79"
+version = "1.0.83"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
+checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
+dependencies = [
+ "libc",
+]
 
 [[package]]
 name = "cfg-if"
@@ -240,24 +213,23 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
 name = "chrono"
-version = "0.4.26"
+version = "0.4.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5"
+checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
  "js-sys",
  "num-traits",
- "time 0.1.45",
  "wasm-bindgen",
- "winapi",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
 name = "chumsky"
-version = "0.9.2"
+version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
+checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
 dependencies = [
  "hashbrown",
  "stacker",
@@ -279,17 +251,11 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 
-[[package]]
-name = "constant_time_eq"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
-
 [[package]]
 name = "core-foundation-sys"
-version = "0.8.4"
+version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
+checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
 
 [[package]]
 name = "crossbeam"
@@ -307,9 +273,9 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-channel"
-version = "0.5.8"
+version = "0.5.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
+checksum = "14c3242926edf34aec4ac3a77108ad4854bffaa2e4ddc1824124ce59231302d5"
 dependencies = [
  "cfg-if",
  "crossbeam-utils",
@@ -317,9 +283,9 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-deque"
-version = "0.8.3"
+version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
+checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751"
 dependencies = [
  "cfg-if",
  "crossbeam-epoch",
@@ -328,22 +294,21 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-epoch"
-version = "0.9.15"
+version = "0.9.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
+checksum = "2d2fe95351b870527a5d09bf563ed3c97c0cffb87cf1c78a591bf48bb218d9aa"
 dependencies = [
  "autocfg",
  "cfg-if",
  "crossbeam-utils",
  "memoffset 0.9.0",
- "scopeguard",
 ]
 
 [[package]]
 name = "crossbeam-queue"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add"
+checksum = "b9bcf5bdbfdd6030fb4a1c497b5d5fc5921aa2f60d359a17e249c0e6df3de153"
 dependencies = [
  "cfg-if",
  "crossbeam-utils",
@@ -351,28 +316,18 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-utils"
-version = "0.8.16"
+version = "0.8.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
+checksum = "c06d96137f14f244c37f989d9fff8f95e6c18b918e71f36638f8c49112e4c78f"
 dependencies = [
  "cfg-if",
 ]
 
-[[package]]
-name = "crypto-common"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
-dependencies = [
- "generic-array",
- "typenum",
-]
-
 [[package]]
 name = "csv"
-version = "1.2.2"
+version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
+checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe"
 dependencies = [
  "csv-core",
  "itoa",
@@ -382,18 +337,18 @@ dependencies = [
 
 [[package]]
 name = "csv-core"
-version = "0.1.10"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
+checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "cxx"
-version = "1.0.102"
+version = "1.0.111"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f68e12e817cb19eaab81aaec582b4052d07debd3c3c6b083b9d361db47c7dc9d"
+checksum = "e9fc0c733f71e58dedf4f034cd2a266f80b94cc9ed512729e1798651b68c2cba"
 dependencies = [
  "cc",
  "cxxbridge-flags",
@@ -403,9 +358,9 @@ dependencies = [
 
 [[package]]
 name = "cxx-build"
-version = "1.0.102"
+version = "1.0.111"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e789217e4ab7cf8cc9ce82253180a9fe331f35f5d339f0ccfe0270b39433f397"
+checksum = "51bc81d2664db24cf1d35405f66e18a85cffd4d49ab930c71a5c6342a410f38c"
 dependencies = [
  "cc",
  "codespan-reporting",
@@ -413,24 +368,24 @@ dependencies = [
  "proc-macro2",
  "quote",
  "scratch",
- "syn 2.0.27",
+ "syn 2.0.41",
 ]
 
 [[package]]
 name = "cxxbridge-flags"
-version = "1.0.102"
+version = "1.0.111"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78a19f4c80fd9ab6c882286fa865e92e07688f4387370a209508014ead8751d0"
+checksum = "8511afbe34ea242697784da5cb2c5d4a0afb224ca8b136bdf93bfe180cbe5884"
 
 [[package]]
 name = "cxxbridge-macro"
-version = "1.0.102"
+version = "1.0.111"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8fcfa71f66c8563c4fa9dd2bb68368d50267856f831ac5d85367e0805f9606c"
+checksum = "5c6888cd161769d65134846d4d4981d5a6654307cc46ec83fb917e530aea5f84"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.27",
+ "syn 2.0.41",
 ]
 
 [[package]]
@@ -478,6 +433,15 @@ dependencies = [
  "once_cell",
 ]
 
+[[package]]
+name = "deranged"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8eb30d70a07a3b04884d2677f06bec33509dc67ca60d92949e5535352d3191dc"
+dependencies = [
+ "powerfmt",
+]
+
 [[package]]
 name = "derive_builder"
 version = "0.11.2"
@@ -509,17 +473,6 @@ dependencies = [
  "syn 1.0.109",
 ]
 
-[[package]]
-name = "digest"
-version = "0.10.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
-dependencies = [
- "block-buffer",
- "crypto-common",
- "subtle",
-]
-
 [[package]]
 name = "dirs-next"
 version = "2.0.0"
@@ -556,28 +509,17 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.27",
+ "syn 2.0.41",
 ]
 
 [[package]]
 name = "errno"
-version = "0.3.2"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f"
+checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
 dependencies = [
- "errno-dragonfly",
- "libc",
- "windows-sys",
-]
-
-[[package]]
-name = "errno-dragonfly"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
-dependencies = [
- "cc",
  "libc",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -595,40 +537,31 @@ dependencies = [
  "thread_local",
 ]
 
-[[package]]
-name = "generic-array"
-version = "0.14.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
-dependencies = [
- "typenum",
- "version_check",
-]
-
 [[package]]
 name = "getrandom"
-version = "0.2.10"
+version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
+checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f"
 dependencies = [
  "cfg-if",
  "libc",
- "wasi 0.11.0+wasi-snapshot-preview1",
+ "wasi",
 ]
 
 [[package]]
 name = "gimli"
-version = "0.27.3"
+version = "0.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
+checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
 
 [[package]]
 name = "hashbrown"
-version = "0.12.3"
+version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
 dependencies = [
  "ahash",
+ "allocator-api2",
 ]
 
 [[package]]
@@ -639,22 +572,22 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
+checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
 
 [[package]]
 name = "iana-time-zone"
-version = "0.1.57"
+version = "0.1.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613"
+checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20"
 dependencies = [
  "android_system_properties",
  "core-foundation-sys",
  "iana-time-zone-haiku",
  "js-sys",
  "wasm-bindgen",
- "windows",
+ "windows-core",
 ]
 
 [[package]]
@@ -680,16 +613,7 @@ checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
 dependencies = [
  "hermit-abi",
  "rustix",
- "windows-sys",
-]
-
-[[package]]
-name = "itertools"
-version = "0.10.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
-dependencies = [
- "either",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -702,16 +626,25 @@ dependencies = [
 ]
 
 [[package]]
-name = "itoa"
-version = "1.0.9"
+name = "itertools"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
+checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
 
 [[package]]
 name = "js-sys"
-version = "0.3.64"
+version = "0.3.66"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
+checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca"
 dependencies = [
  "wasm-bindgen",
 ]
@@ -724,9 +657,20 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
 [[package]]
 name = "libc"
-version = "0.2.147"
+version = "0.2.151"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
+checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
+
+[[package]]
+name = "libredox"
+version = "0.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8"
+dependencies = [
+ "bitflags 2.4.1",
+ "libc",
+ "redox_syscall",
+]
 
 [[package]]
 name = "link-cplusplus"
@@ -739,21 +683,21 @@ dependencies = [
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.5"
+version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
+checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
 
 [[package]]
 name = "log"
-version = "0.4.19"
+version = "0.4.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
+checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
 
 [[package]]
 name = "memchr"
-version = "2.5.0"
+version = "2.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
 
 [[package]]
 name = "memoffset"
@@ -825,37 +769,27 @@ dependencies = [
 
 [[package]]
 name = "num-traits"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
+checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
 dependencies = [
  "autocfg",
 ]
 
-[[package]]
-name = "num_cpus"
-version = "1.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
-dependencies = [
- "hermit-abi",
- "libc",
-]
-
 [[package]]
 name = "object"
-version = "0.31.1"
+version = "0.32.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
+checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "once_cell"
-version = "1.18.0"
+version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
 
 [[package]]
 name = "pin-utils"
@@ -864,19 +798,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
-name = "proc-macro2"
-version = "1.0.66"
+name = "powerfmt"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.70"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "prql-ast"
-version = "0.9.3"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71194e75f14dbe7debdf2b5eca0812c978021a1bd23d6fe1da98b58e407e035a"
+checksum = "d9d91522f9f16d055409b9ffec55693a96e3424fe5d8e7c8331adcf6d7ee363a"
 dependencies = [
  "enum-as-inner",
  "semver",
@@ -886,9 +826,9 @@ dependencies = [
 
 [[package]]
 name = "prql-compiler"
-version = "0.9.3"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ff28e838b1be4227cc567a75c11caa3be25c5015f0e5fd21279c06e944ba44f"
+checksum = "f4d56865532fcf1abaa31fbb6da6fd9e90edc441c5c78bfe2870ee75187c7a3c"
 dependencies = [
  "anstream",
  "anyhow",
@@ -912,9 +852,9 @@ dependencies = [
 
 [[package]]
 name = "prql-parser"
-version = "0.9.3"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3182e2ef0465a960eb02519b18768e39123d3c3a0037a2d2934055a3ef901870"
+checksum = "9360352e413390cfd26345f49279622b87581a3b748340d3f42d4d616c2a1ec1"
 dependencies = [
  "chumsky",
  "itertools 0.11.0",
@@ -933,18 +873,18 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.31"
+version = "1.0.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
 dependencies = [
  "proc-macro2",
 ]
 
 [[package]]
 name = "rayon"
-version = "1.7.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
+checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
 dependencies = [
  "either",
  "rayon-core",
@@ -952,41 +892,39 @@ dependencies = [
 
 [[package]]
 name = "rayon-core"
-version = "1.11.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
+checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
 dependencies = [
- "crossbeam-channel",
  "crossbeam-deque",
  "crossbeam-utils",
- "num_cpus",
 ]
 
 [[package]]
 name = "redox_syscall"
-version = "0.2.16"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
 dependencies = [
  "bitflags 1.3.2",
 ]
 
 [[package]]
 name = "redox_users"
-version = "0.4.3"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
+checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4"
 dependencies = [
  "getrandom",
- "redox_syscall",
+ "libredox",
  "thiserror",
 ]
 
 [[package]]
 name = "regex"
-version = "1.9.1"
+version = "1.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
+checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -996,9 +934,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.3.3"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
+checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -1007,9 +945,9 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.7.4"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
+checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
 
 [[package]]
 name = "rustc-demangle"
@@ -1019,15 +957,15 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
 
 [[package]]
 name = "rustix"
-version = "0.38.6"
+version = "0.38.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ee020b1716f0a80e2ace9b03441a749e402e86712f15f16fe8a8f75afac732f"
+checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.1",
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -1038,15 +976,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
 
 [[package]]
 name = "ryu"
-version = "1.0.15"
+version = "1.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
-
-[[package]]
-name = "scopeguard"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
 
 [[package]]
 name = "scratch"
@@ -1056,38 +988,38 @@ checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
 
 [[package]]
 name = "semver"
-version = "1.0.18"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
+checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
 dependencies = [
  "serde",
 ]
 
 [[package]]
 name = "serde"
-version = "1.0.174"
+version = "1.0.193"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1"
+checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.174"
+version = "1.0.193"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e"
+checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.27",
+ "syn 2.0.41",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.103"
+version = "1.0.108"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
+checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
 dependencies = [
  "itoa",
  "ryu",
@@ -1112,7 +1044,7 @@ dependencies = [
  "nix 0.25.1",
  "rayon",
  "regex",
- "time 0.3.23",
+ "time",
  "timer",
  "tuikit",
  "unicode-width",
@@ -1121,20 +1053,20 @@ dependencies = [
 
 [[package]]
 name = "sqlformat"
-version = "0.2.1"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
+checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c"
 dependencies = [
- "itertools 0.10.5",
+ "itertools 0.12.0",
  "nom",
  "unicode_categories",
 ]
 
 [[package]]
 name = "sqlparser"
-version = "0.36.1"
+version = "0.37.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87"
+checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075"
 dependencies = [
  "log",
  "serde",
@@ -1170,23 +1102,17 @@ dependencies = [
 
 [[package]]
 name = "strum_macros"
-version = "0.25.1"
+version = "0.25.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232"
+checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
 dependencies = [
  "heck",
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.27",
+ "syn 2.0.41",
 ]
 
-[[package]]
-name = "subtle"
-version = "2.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
-
 [[package]]
 name = "syn"
 version = "1.0.109"
@@ -1200,9 +1126,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.27"
+version = "2.0.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
+checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1222,31 +1148,31 @@ dependencies = [
 
 [[package]]
 name = "termcolor"
-version = "1.2.0"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
+checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449"
 dependencies = [
  "winapi-util",
 ]
 
 [[package]]
 name = "thiserror"
-version = "1.0.44"
+version = "1.0.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
+checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.44"
+version = "1.0.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
+checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.27",
+ "syn 2.0.41",
 ]
 
 [[package]]
@@ -1261,30 +1187,21 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.1.45"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
-dependencies = [
- "libc",
- "wasi 0.10.0+wasi-snapshot-preview1",
- "winapi",
-]
-
-[[package]]
-name = "time"
-version = "0.3.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446"
+checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5"
 dependencies = [
+ "deranged",
+ "powerfmt",
  "serde",
  "time-core",
 ]
 
 [[package]]
 name = "time-core"
-version = "0.1.1"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
+checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
 
 [[package]]
 name = "timer"
@@ -1309,23 +1226,17 @@ dependencies = [
  "unicode-width",
 ]
 
-[[package]]
-name = "typenum"
-version = "1.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
-
 [[package]]
 name = "unicode-ident"
-version = "1.0.11"
+version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 
 [[package]]
 name = "unicode-width"
-version = "0.1.10"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
+checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
 
 [[package]]
 name = "unicode_categories"
@@ -1366,12 +1277,6 @@ dependencies = [
  "quote",
 ]
 
-[[package]]
-name = "wasi"
-version = "0.10.0+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
-
 [[package]]
 name = "wasi"
 version = "0.11.0+wasi-snapshot-preview1"
@@ -1380,9 +1285,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.87"
+version = "0.2.89"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
+checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e"
 dependencies = [
  "cfg-if",
  "wasm-bindgen-macro",
@@ -1390,24 +1295,24 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.87"
+version = "0.2.89"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
+checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826"
 dependencies = [
  "bumpalo",
  "log",
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.27",
+ "syn 2.0.41",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.87"
+version = "0.2.89"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
+checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -1415,22 +1320,22 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.87"
+version = "0.2.89"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
+checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.27",
+ "syn 2.0.41",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.87"
+version = "0.2.89"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
+checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f"
 
 [[package]]
 name = "winapi"
@@ -1450,9 +1355,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-util"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
 dependencies = [
  "winapi",
 ]
@@ -1464,12 +1369,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
-name = "windows"
-version = "0.48.0"
+name = "windows-core"
+version = "0.51.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
+checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -1478,68 +1383,154 @@ version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.0",
 ]
 
 [[package]]
 name = "windows-targets"
-version = "0.48.1"
+version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.0",
+ "windows_aarch64_msvc 0.52.0",
+ "windows_i686_gnu 0.52.0",
+ "windows_i686_msvc 0.52.0",
+ "windows_x86_64_gnu 0.52.0",
+ "windows_x86_64_gnullvm 0.52.0",
+ "windows_x86_64_msvc 0.52.0",
 ]
 
 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.48.0"
+version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
 
 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.48.0"
+version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
 
 [[package]]
 name = "windows_i686_gnu"
-version = "0.48.0"
+version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
 
 [[package]]
 name = "windows_i686_msvc"
-version = "0.48.0"
+version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
 
 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.48.0"
+version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.48.0"
+version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
 
 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.48.0"
+version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
 
 [[package]]
 name = "yansi"
 version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
+
+[[package]]
+name = "zerocopy"
+version = "0.7.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.41",
+]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 2a2b582cea8..ac8b31a7290 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -1,7 +1,6 @@
 # workspace is required to vendor crates for all packages.
 [workspace]
 members = [
-    "BLAKE3",
     "skim",
     "prql",
 ]
diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 9468bc259e3..7dfd910bea4 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -16,7 +16,7 @@
 #include <xxhash.h>
 
 #if USE_BLAKE3
-#    include <blake3.h>
+#    include <llvm/Support/BLAKE3.h>
 #endif
 
 #include <Common/SipHash.h>
@@ -833,13 +833,13 @@ struct ImplBLAKE3
 #else
     static void apply(const char * begin, const size_t size, unsigned char* out_char_data)
     {
-        auto err_msg = blake3_apply_shim(begin, safe_cast<uint32_t>(size), out_char_data);
-        if (err_msg != nullptr)
-        {
-            auto err_st = std::string(err_msg);
-            blake3_free_char_pointer(err_msg);
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function returned error message: {}", err_st);
-        }
+        static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length);
+        auto & result = *reinterpret_cast<std::array<uint8_t, LLVM_BLAKE3_OUT_LEN> *>(out_char_data);
+
+        llvm::BLAKE3 hasher;
+        if (size > 0)
+            hasher.update(llvm::StringRef(begin, size));
+        hasher.final(result);
     }
 #endif
 };
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index c3c6d9be6da..871a5ff6644 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -19,9 +19,6 @@ endif()
 if (TARGET ch_contrib::rdkafka)
     set(USE_RDKAFKA 1)
 endif()
-if (TARGET ch_rust::blake3)
-    set(USE_BLAKE3 1)
-endif()
 if (TARGET ch_rust::skim)
     set(USE_SKIM 1)
 endif()
@@ -103,6 +100,7 @@ endif()
 if (TARGET ch_contrib::llvm)
     set(USE_EMBEDDED_COMPILER ${ENABLE_EMBEDDED_COMPILER})
     set(USE_DWARF_PARSER ${ENABLE_DWARF_PARSER})
+    set(USE_BLAKE3 1)
 endif()
 if (TARGET ch_contrib::unixodbc)
     set(USE_ODBC 1)

From 7ebb7d29553529e4f190b6c04fa37d3fa28168d2 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 18 Dec 2023 15:13:43 +0000
Subject: [PATCH 079/137] better trivial count optimization for storage Merge

---
 src/Storages/StorageMerge.cpp | 41 ++++++++++++++++-------------------
 src/Storages/StorageMerge.h   |  7 ++++--
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 1bf585771ff..36c92129177 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -1295,38 +1295,35 @@ std::tuple<bool /* is_regexp */, ASTPtr> StorageMerge::evaluateDatabaseName(cons
 
 bool StorageMerge::supportsTrivialCountOptimization() const
 {
-    bool supported = true;
-    forEachTable([&](const auto & table)
-    {
-        supported &= table->supportsTrivialCountOptimization();
-    });
-    return supported;
+    return getFirstTable([&](const auto & table) { return !table->supportsTrivialCountOptimization(); }) == nullptr;
 }
 
 std::optional<UInt64> StorageMerge::totalRows(const Settings & settings) const
 {
-    UInt64 total_rows = 0;
-    forEachTable([&](const auto & table)
-    {
-        std::optional<UInt64> rows = table->totalRows(settings);
-        if (rows)
-            total_rows += *rows;
-    });
-    return {total_rows};
+    return totalRowsOrBytes([&](const auto & table) { return table->totalRows(settings); });
 }
 
 std::optional<UInt64> StorageMerge::totalBytes(const Settings & settings) const
 {
-    UInt64 total_bytes = 0;
-    forEachTable([&](const auto & table)
-    {
-        std::optional<UInt64> bytes = table->totalBytes(settings);
-        if (bytes)
-            total_bytes += *bytes;
-    });
-    return {total_bytes};
+    return totalRowsOrBytes([&](const auto & table) { return table->totalBytes(settings); });
 }
 
+template <typename F>
+std::optional<UInt64> StorageMerge::totalRowsOrBytes(F && func) const
+{
+    UInt64 total_rows_or_bytes = 0;
+    auto first_table = getFirstTable([&](const auto & table)
+    {
+        if (auto rows_or_bytes = func(table))
+        {
+            total_rows_or_bytes += *rows_or_bytes;
+            return false;
+        }
+        return true;
+    });
+
+    return first_table ? std::nullopt : std::make_optional(total_rows_or_bytes);
+}
 
 void registerStorageMerge(StorageFactory & factory)
 {
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index 762dc71ab83..2455eb678bb 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -79,8 +79,8 @@ public:
 
     bool supportsTrivialCountOptimization() const override;
 
-    std::optional<UInt64> totalRows(const Settings &) const override;
-    std::optional<UInt64> totalBytes(const Settings &) const override;
+    std::optional<UInt64> totalRows(const Settings & settings) const override;
+    std::optional<UInt64> totalBytes(const Settings & settings) const override;
 
 private:
     std::optional<OptimizedRegularExpression> source_database_regexp;
@@ -118,6 +118,9 @@ private:
 
     bool tableSupportsPrewhere() const;
 
+    template <typename F>
+    std::optional<UInt64> totalRowsOrBytes(F && func) const;
+
     friend class ReadFromMerge;
 };
 

From 1a66dd94fa65986037d40ad63216a2de551db812 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 18 Dec 2023 15:37:02 +0000
Subject: [PATCH 080/137] init

---
 docs/en/sql-reference/functions/date-time-functions.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 565486275e6..0261589b968 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1809,6 +1809,8 @@ Alias: `dateTrunc`.
     - `quarter`
     - `year`
 
+    `unit` argument is case-insensitive.
+
 - `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
 

From b9408125cc91ecdc61a2dc09785e10fe61d62bd6 Mon Sep 17 00:00:00 2001
From: Max K <max.kainov@clickhouse.com>
Date: Mon, 18 Dec 2023 17:06:10 +0100
Subject: [PATCH 081/137] CI for docs only fix (#57992)

* run only docs related jobs if changes are in docs only (fix after applying CI with digest)
---
 .github/workflows/backport_branches.yml |  4 +++-
 .github/workflows/master.yml            |  2 ++
 .github/workflows/nightly.yml           |  2 ++
 .github/workflows/pull_request.yml      |  7 ------
 .github/workflows/release_branches.yml  |  3 +++
 tests/ci/ci.py                          | 29 +++++++++++++++++--------
 tests/ci/pr_info.py                     | 20 +++++++++++++++++
 7 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index ceb62083f78..c3e74390646 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -19,6 +19,8 @@ jobs:
         uses: ClickHouse/checkout@v1
         with:
           clear-repository: true # to ensure correct digests
+          fetch-depth: 0 # to get version
+          filter: tree:0
       - name: Labels check
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
@@ -155,7 +157,7 @@ jobs:
 ##################################### BUILD REPORTER #######################################
 ############################################################################################
   BuilderReport:
-    if: ${{ success() || failure() }}
+    if: ${{ !failure() && !cancelled() }}
     needs:
       - RunConfig
       - BuilderDebAarch64
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index fcfe2106e97..432a9df5369 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -19,6 +19,8 @@ jobs:
         uses: ClickHouse/checkout@v1
         with:
           clear-repository: true # to ensure correct digests
+          fetch-depth: 0 # to get version
+          filter: tree:0
       - name: Python unit tests
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 458f59dfd38..2774eae24cc 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -22,6 +22,8 @@ jobs:
         uses: ClickHouse/checkout@v1
         with:
           clear-repository: true # to ensure correct digests
+          fetch-depth: 0 # to get version
+          filter: tree:0
       - name: PrepareRunConfig
         id: runconfig
         run: |
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 9f4636f73a4..08a4ab99520 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -13,13 +13,6 @@ on:  # yamllint disable-line rule:truthy
       - opened
     branches:
       - master
-    paths-ignore:
-      - '**.md'
-      - 'docker/docs/**'
-      - 'docs/**'
-      - 'utils/check-style/aspell-ignore/**'
-      - 'tests/ci/docs_check.py'
-      - '.github/workflows/docs_check.yml'
 ##########################################################################################
 ##################################### SMALL CHECKS #######################################
 ##########################################################################################
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 4fecc5acf58..fa8e93369b3 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -22,6 +22,8 @@ jobs:
         uses: ClickHouse/checkout@v1
         with:
           clear-repository: true # to ensure correct digests
+          fetch-depth: 0 # to get version
+          filter: tree:0
       - name: Labels check
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
@@ -214,6 +216,7 @@ jobs:
       run_command: |
         python3 build_report_check.py "$CHECK_NAME"
   MarkReleaseReady:
+    if: ${{ !failure() && !cancelled() }}
     needs:
       - BuilderBinDarwin
       - BuilderBinDarwinAarch64
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 3e135d7ffa7..f2293b8338b 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -86,7 +86,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
     parser.add_argument(
         "--mark-success",
         action="store_true",
-        help="Action that marks job provided in --job-name (with batch provided in --batch) as successfull",
+        help="Action that marks job provided in --job-name (with batch provided in --batch) as successful",
     )
     parser.add_argument(
         "--job-name",
@@ -111,13 +111,13 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
         default="",
         type=str,
         required=False,
-        help="otput file to write json result to, if not set - stdout",
+        help="output file to write json result to, if not set - stdout",
     )
     parser.add_argument(
         "--pretty",
         action="store_true",
         default=False,
-        help="makes json output pretty formated",
+        help="makes json output pretty formatted",
     )
     parser.add_argument(
         "--skip-docker",
@@ -259,6 +259,15 @@ def _check_and_update_for_early_style_check(run_config: dict) -> None:
         jobs_to_do[index] = "Style check early"
 
 
+def _update_config_for_docs_only(run_config: dict) -> None:
+    DOCS_CHECK_JOBS = ["Docs check", "Style check"]
+    print(f"NOTE: Will keep only docs related jobs: [{DOCS_CHECK_JOBS}]")
+    jobs_to_do = run_config.get("jobs_data", {}).get("jobs_to_do", [])
+    run_config["jobs_data"]["jobs_to_do"] = [
+        job for job in jobs_to_do if job in DOCS_CHECK_JOBS
+    ]
+
+
 def _configure_docker_jobs(
     rebuild_all_dockers: bool, docker_digest_or_latest: bool = False
 ) -> Dict:
@@ -363,12 +372,12 @@ def _configure_jobs(
         batches_to_do: List[int] = []
 
         if job_config.run_by_label:
-            # this job controled by label, add to todo if it's labe is set in pr
+            # this job controlled by label, add to todo if it's labe is set in pr
             if job_config.run_by_label in pr_labels:
                 for batch in range(num_batches):  # type: ignore
                     batches_to_do.append(batch)
         else:
-            # this job controled by digest, add to todo if it's not successfully done before
+            # this job controlled by digest, add to todo if it's not successfully done before
             for batch in range(num_batches):  # type: ignore
                 success_flag_name = get_file_flag_name(job, digest, batch, num_batches)
                 if success_flag_name not in done_files or (
@@ -417,7 +426,7 @@ def _configure_jobs(
 
 
 def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None:
-    # This action is required to re-create all GH statuses for skiped jobs, so that ci report can be generated afterwards
+    # This action is required to re-create all GH statuses for skipped jobs, so that ci report can be generated afterwards
     temp_path = Path(TEMP_PATH)
     if not temp_path.exists():
         temp_path.mkdir(parents=True, exist_ok=True)
@@ -588,8 +597,10 @@ def main() -> int:
         result["docs"] = docs_digest
         result["jobs_data"] = jobs_data
         result["docker_data"] = docker_data
-        if not args.docker_digest_or_latest:
+        if pr_info.number != 0 and not args.docker_digest_or_latest:
             _check_and_update_for_early_style_check(result)
+        if pr_info.number != 0 and pr_info.has_changes_in_documentation_only():
+            _update_config_for_docs_only(result)
 
     elif args.update_gh_statuses:
         assert indata, "Run config must be provided via --infile"
@@ -614,7 +625,7 @@ def main() -> int:
                 f"Pre action done. Report files [{files}] have been downloaded from [{path}] to [{report_path}]"
             )
         else:
-            print("Pre action done. Nothing to do for [{args.job_name}]")
+            print(f"Pre action done. Nothing to do for [{args.job_name}]")
 
     elif args.run:
         assert CI_CONFIG.get_job_config(
@@ -684,7 +695,7 @@ def main() -> int:
             )
         else:
             if not CommitStatusData.is_present():
-                # apperently exit after rerun-helper check
+                # apparently exit after rerun-helper check
                 # do nothing, exit without failure
                 print("ERROR: no status file for job [{job}]")
                 job_status = CommitStatusData(
diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py
index 8e7da53453f..c023ca048d6 100644
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@@ -311,6 +311,26 @@ class PRInfo:
                 return True
         return False
 
+    def has_changes_in_documentation_only(self) -> bool:
+        """
+        checks if changes are docs related without other changes
+        FIXME: avoid hardcoding filenames here
+        """
+        if not self.changed_files:
+            return False
+
+        for f in self.changed_files:
+            _, ext = os.path.splitext(f)
+            path_in_docs = f.startswith("docs/")
+            if not (
+                (ext in DIFF_IN_DOCUMENTATION_EXT and path_in_docs)
+                or "docker/docs" in f
+                or "docs_check.py" in f
+                or ext == ".md"
+            ):
+                return False
+        return True
+
     def has_changes_in_submodules(self):
         if self.changed_files is None or not self.changed_files:
             return True

From 566aeaa17aaf247f973fb4516ba7c46c3460624e Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 18 Dec 2023 17:23:41 +0100
Subject: [PATCH 082/137] Better config

---
 tests/config/config.d/storage_conf.xml       | 11 ---------
 tests/config/config.d/storage_conf_02944.xml | 25 ++++++++++++++++++++
 tests/config/install.sh                      |  1 +
 3 files changed, 26 insertions(+), 11 deletions(-)
 create mode 100644 tests/config/config.d/storage_conf_02944.xml

diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml
index 7a5caad9139..18652826d83 100644
--- a/tests/config/config.d/storage_conf.xml
+++ b/tests/config/config.d/storage_conf.xml
@@ -29,17 +29,6 @@
                 <background_download_threads>0</background_download_threads>
                 <background_download_queue_size_limit>0</background_download_queue_size_limit>
             </s3_cache_02933>
-            <s3_cache_02944>
-                <type>cache</type>
-                <disk>s3_disk</disk>
-                <path>s3_cache_02944/</path>
-                <max_size>100</max_size>
-                <max_elements>100</max_elements>
-                <max_file_segment_size>10</max_file_segment_size>
-                <boundary_alignment>10</boundary_alignment>
-                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
-                <cache_on_write_operations>0</cache_on_write_operations>
-            </s3_cache_02944>
             <!-- local disks -->
             <local_disk>
                 <type>local_blob_storage</type>
diff --git a/tests/config/config.d/storage_conf_02944.xml b/tests/config/config.d/storage_conf_02944.xml
new file mode 100644
index 00000000000..5f45640a923
--- /dev/null
+++ b/tests/config/config.d/storage_conf_02944.xml
@@ -0,0 +1,25 @@
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <s3_disk_02944>
+                <type>s3</type>
+                <path>s3_disk/</path>
+                <endpoint>http://localhost:11111/test/test_02944/</endpoint>
+                <access_key_id>clickhouse</access_key_id>
+                <secret_access_key>clickhouse</secret_access_key>
+                <request_timeout_ms>20000</request_timeout_ms>
+            </s3_disk_02944>
+            <s3_cache_02944>
+                <type>cache</type>
+                <disk>s3_disk</disk>
+                <path>s3_cache_02944/</path>
+                <max_size>100</max_size>
+                <max_elements>10</max_elements>
+                <max_file_segment_size>10</max_file_segment_size>
+                <boundary_alignment>10</boundary_alignment>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
+                <cache_on_write_operations>0</cache_on_write_operations>
+            </s3_cache_02944>
+        </disks>
+    </storage_configuration>
+</clickhouse>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 6046f05c922..2f9fd44c9b0 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -156,6 +156,7 @@ if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then
     fi
 
     ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/
+    ln -sf $SRC_PATH/config.d/storage_conf_02944.xml $DEST_SERVER_PATH/config.d/
     ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/
     ln -sf $SRC_PATH/users.d/s3_cache_new.xml $DEST_SERVER_PATH/users.d/
 fi

From 528df04b870c61d37b3438528c12e2cd5f34c4ac Mon Sep 17 00:00:00 2001
From: Max K <max.kainov@clickhouse.com>
Date: Mon, 18 Dec 2023 17:54:27 +0100
Subject: [PATCH 083/137] add needs_changed_files flag for pr_info (#58003)

---
 tests/ci/ci.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index f2293b8338b..bed12d54fe3 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -539,7 +539,7 @@ def main() -> int:
 
     if args.configure:
         GR = GitRunner()
-        pr_info = PRInfo()
+        pr_info = PRInfo(need_changed_files=True)
 
         docker_data = {}
         git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD")

From e357c088a0716270c8bcd5e7f0cb055296372f7d Mon Sep 17 00:00:00 2001
From: Christoph Wurm <christoph@clickhouse.com>
Date: Mon, 18 Dec 2023 09:49:12 +0000
Subject: [PATCH 084/137] Update settings.md:
 allow_experimental_parallel_reading_from_replicas

Document all possible values for `allow_experimental_parallel_reading_from_replicas`.
---
 docs/en/operations/settings/settings.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 5c509058cbb..dc3baf09d00 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1578,9 +1578,15 @@ Default value: `default`.
 
 ## allow_experimental_parallel_reading_from_replicas
 
-If true, ClickHouse will send a SELECT query to all replicas of a table (up to `max_parallel_replicas`) . It will work for any kind of MergeTree table.
+Enables or disables sending SELECT queries to all replicas of a table (up to `max_parallel_replicas`). Reading is parallelized and coordinated dynamically. It will work for any kind of MergeTree table.
 
-Default value: `false`.
+Possible values:
+
+- 0 - Disabled.
+- 1 - Enabled, silently disabled in case of failure.
+- 2 - Enabled, throws an exception in case of failure.
+
+Default value: `0`.
 
 ## compile_expressions {#compile-expressions}
 

From a12fe7473cfd500d093f033d0133ca294a5f870f Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 18 Dec 2023 18:08:57 +0100
Subject: [PATCH 085/137] add killed by timeout message

---
 docker/test/stateful/run.sh     |  4 ++--
 docker/test/stateless/run.sh    |  4 ++--
 docker/test/stateless/utils.lib | 15 +++++++++++++++
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index 806b57c4616..82587efcb3d 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -78,7 +78,7 @@ function start()
             tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
             break
         fi
-        timeout 120 service clickhouse-server start
+        timeout_with_logging 120 service clickhouse-server start
         sleep 0.5
         counter=$((counter + 1))
     done
@@ -163,7 +163,7 @@ function run_tests()
 }
 
 export -f run_tests
-timeout "$MAX_RUN_TIME" bash -c run_tests ||:
+timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||:
 
 echo "Files in current directory"
 ls -la ./
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index bfa9f9938ab..bd44d1724ae 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -216,11 +216,11 @@ export -f run_tests
 if [ "$NUM_TRIES" -gt "1" ]; then
     # We don't run tests with Ordinary database in PRs, only in master.
     # So run new/changed tests with Ordinary at least once in flaky check.
-    timeout "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
+    timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
       | sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
 fi
 
-timeout "$MAX_RUN_TIME" bash -c run_tests ||:
+timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||:
 
 echo "Files in current directory"
 ls -la ./
diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib
index 1204434d853..a30e05b46ff 100644
--- a/docker/test/stateless/utils.lib
+++ b/docker/test/stateless/utils.lib
@@ -35,4 +35,19 @@ function fn_exists() {
     declare -F "$1" > /dev/null;
 }
 
+function timeout_with_logging() {
+    local exit_code=0
+
+    timeout "${@}" || exit_code="${?}"
+
+    if [[ "${exit_code}" -eq "124" ]]
+    then
+      echo "The command 'timeout ${*}' has been killed by timeout"
+    fi
+
+    return $exit_code
+}
+
 # vi: ft=bash
+
+}

From 80af5cb78a4bd557538dc3dc6268101ea1c56ab0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 18 Dec 2023 18:32:31 +0100
Subject: [PATCH 086/137] Always build the necessary LLVM modules if blake3 is
 enabled

---
 contrib/llvm-project-cmake/CMakeLists.txt | 120 ++++++++++++----------
 src/Functions/FunctionsHashing.h          |   4 +-
 src/configure_config.cmake                |   2 +-
 3 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt
index 406bac73e90..d09060912d8 100644
--- a/contrib/llvm-project-cmake/CMakeLists.txt
+++ b/contrib/llvm-project-cmake/CMakeLists.txt
@@ -11,7 +11,9 @@ option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during quer
 
 option (ENABLE_DWARF_PARSER "Enable support for DWARF input format (uses LLVM library)" ${ENABLE_DWARF_PARSER_DEFAULT})
 
-if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER)
+option (ENABLE_BLAKE3 "Enable BLAKE3 function" ${ENABLE_LIBRARIES})
+
+if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER AND NOT ENABLE_BLAKE3)
     message(STATUS "Not using LLVM")
     return()
 endif()
@@ -26,61 +28,75 @@ set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
 # and llvm cannot be compiled with bundled libcxx and 20 standard.
 set (CMAKE_CXX_STANDARD 14)
 
-# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles.
-set (REQUIRED_LLVM_LIBRARIES
-    LLVMExecutionEngine
-    LLVMRuntimeDyld
-    LLVMAsmPrinter
-    LLVMDebugInfoDWARF
-    LLVMGlobalISel
-    LLVMSelectionDAG
-    LLVMMCDisassembler
-    LLVMPasses
-    LLVMCodeGen
-    LLVMipo
-    LLVMBitWriter
-    LLVMInstrumentation
-    LLVMScalarOpts
-    LLVMAggressiveInstCombine
-    LLVMInstCombine
-    LLVMVectorize
-    LLVMTransformUtils
-    LLVMTarget
-    LLVMAnalysis
-    LLVMProfileData
-    LLVMObject
-    LLVMBitReader
-    LLVMCore
-    LLVMRemarks
-    LLVMBitstreamReader
-    LLVMMCParser
-    LLVMMC
-    LLVMBinaryFormat
-    LLVMDebugInfoCodeView
-    LLVMSupport
-    LLVMDemangle
-)
+if (ARCH_AMD64)
+    set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "")
+elseif (ARCH_AARCH64)
+    set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "")
+elseif (ARCH_PPC64LE)
+    set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "")
+elseif (ARCH_S390X)
+    set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "")
+elseif (ARCH_RISCV64)
+    set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "")
+endif ()
+
+
+if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER)
+    # Only compiling blake3
+    set (REQUIRED_LLVM_LIBRARIES LLVMSupport)
+else()
+    # This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles.
+    set (REQUIRED_LLVM_LIBRARIES
+        LLVMExecutionEngine
+        LLVMRuntimeDyld
+        LLVMAsmPrinter
+        LLVMDebugInfoDWARF
+        LLVMGlobalISel
+        LLVMSelectionDAG
+        LLVMMCDisassembler
+        LLVMPasses
+        LLVMCodeGen
+        LLVMipo
+        LLVMBitWriter
+        LLVMInstrumentation
+        LLVMScalarOpts
+        LLVMAggressiveInstCombine
+        LLVMInstCombine
+        LLVMVectorize
+        LLVMTransformUtils
+        LLVMTarget
+        LLVMAnalysis
+        LLVMProfileData
+        LLVMObject
+        LLVMBitReader
+        LLVMCore
+        LLVMRemarks
+        LLVMBitstreamReader
+        LLVMMCParser
+        LLVMMC
+        LLVMBinaryFormat
+        LLVMDebugInfoCodeView
+        LLVMSupport
+        LLVMDemangle
+    )
+
+    if (ARCH_AMD64)
+        list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)
+    elseif (ARCH_AARCH64)
+        list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
+    elseif (ARCH_PPC64LE)
+        list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen)
+    elseif (ARCH_S390X)
+        list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen)
+    elseif (ARCH_RISCV64)
+        list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen)
+    endif ()
+endif()
+
 
 # Skip useless "install" instructions from CMake:
 set (LLVM_INSTALL_TOOLCHAIN_ONLY 1 CACHE INTERNAL "")
 
-if (ARCH_AMD64)
-    set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "")
-    list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)
-elseif (ARCH_AARCH64)
-    set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "")
-    list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
-elseif (ARCH_PPC64LE)
-    set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "")
-    list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen)
-elseif (ARCH_S390X)
-    set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "")
-    list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen)
-elseif (ARCH_RISCV64)
-    set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "")
-    list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen)
-endif ()
-
 message (STATUS "LLVM TARGETS TO BUILD ${LLVM_TARGETS_TO_BUILD}")
 
 set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind
diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 7dfd910bea4..c83c3b7f41b 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -825,10 +825,10 @@ struct ImplBLAKE3
     static constexpr auto name = "BLAKE3";
     enum { length = 32 };
 
-#if !USE_BLAKE3
+#ifndef USE_BLAKE3
     [[noreturn]] static void apply(const char * /*begin*/, const size_t /*size*/, unsigned char * /*out_char_data*/)
     {
-        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available. Rust code or BLAKE3 itself may be disabled.");
+        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available");
     }
 #else
     static void apply(const char * begin, const size_t size, unsigned char* out_char_data)
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index 871a5ff6644..9358abdf7f8 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -100,7 +100,7 @@ endif()
 if (TARGET ch_contrib::llvm)
     set(USE_EMBEDDED_COMPILER ${ENABLE_EMBEDDED_COMPILER})
     set(USE_DWARF_PARSER ${ENABLE_DWARF_PARSER})
-    set(USE_BLAKE3 1)
+    set(USE_BLAKE3 ${ENABLE_LIBRARIES})
 endif()
 if (TARGET ch_contrib::unixodbc)
     set(USE_ODBC 1)

From 0a45ffef3090a1245a0f9853646edd33c1bcd230 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Mon, 18 Dec 2023 19:14:45 +0100
Subject: [PATCH 087/137] Update
 02944_dynamically_change_filesystem_cache_size.sh

---
 .../02944_dynamically_change_filesystem_cache_size.sh           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh
index 021493eaa82..e47e13a7e40 100755
--- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh
+++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh
@@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null"
 $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
 $CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
 
-config_path=/etc/clickhouse-server/config.d/storage_conf.xml
+config_path=/etc/clickhouse-server/config.d/storage_conf_02944.xml
 config_path_tmp=$config_path.tmp
 
 echo 'set max_size from 100 to 10'

From 5601f97e0c4ae09eea8a31a9cb8733f8c5e20d4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 18 Dec 2023 19:14:47 +0100
Subject: [PATCH 088/137] Move FunctionsStringHashFixedString template to
 single file

---
 src/Functions/FunctionsHashing.h              | 320 ++-----------
 src/Functions/FunctionsHashingMisc.cpp        |  41 +-
 src/Functions/FunctionsHashingSSL.cpp         | 177 -------
 .../FunctionsStringHashFixedString.cpp        | 441 ++++++++++++++++++
 4 files changed, 503 insertions(+), 476 deletions(-)
 delete mode 100644 src/Functions/FunctionsHashingSSL.cpp
 create mode 100644 src/Functions/FunctionsStringHashFixedString.cpp

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index c83c3b7f41b..8fb21cd1ad4 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -15,24 +15,13 @@
 #endif
 #include <xxhash.h>
 
-#if USE_BLAKE3
-#    include <llvm/Support/BLAKE3.h>
-#endif
-
 #include <Common/SipHash.h>
 #include <Common/typeid_cast.h>
 #include <Common/safe_cast.h>
 #include <Common/HashTable/Hash.h>
 
 #if USE_SSL
-#    include <openssl/md4.h>
 #    include <openssl/md5.h>
-#    include <openssl/sha.h>
-#if USE_BORINGSSL
-#    include <openssl/digest.h>
-#else
-#    include <openssl/evp.h>
-#endif
 #endif
 
 #include <bit>
@@ -191,6 +180,40 @@ T combineHashesFunc(T t1, T t2)
 }
 
 
+struct SipHash64Impl
+{
+    static constexpr auto name = "sipHash64";
+    using ReturnType = UInt64;
+
+    static UInt64 apply(const char * begin, size_t size) { return sipHash64(begin, size); }
+    static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return combineHashesFunc<UInt64, SipHash64Impl>(h1, h2); }
+
+    static constexpr bool use_int_hash_for_pods = false;
+};
+
+struct SipHash64KeyedImpl
+{
+    static constexpr auto name = "sipHash64Keyed";
+    using ReturnType = UInt64;
+    using Key = impl::SipHashKey;
+    using KeyColumns = impl::SipHashKeyColumns;
+
+    static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
+    static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
+
+    static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
+
+    static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2)
+    {
+        transformEndianness<std::endian::little>(h1);
+        transformEndianness<std::endian::little>(h2);
+        const UInt64 hashes[]{h1, h2};
+        return applyKeyed(key, reinterpret_cast<const char *>(hashes), sizeof(hashes));
+    }
+
+    static constexpr bool use_int_hash_for_pods = false;
+};
+
 #if USE_SSL
 struct HalfMD5Impl
 {
@@ -225,159 +248,8 @@ struct HalfMD5Impl
 
     static constexpr bool use_int_hash_for_pods = false;
 };
-
-struct MD4Impl
-{
-    static constexpr auto name = "MD4";
-    enum { length = MD4_DIGEST_LENGTH };
-
-    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
-    {
-        MD4_CTX ctx;
-        MD4_Init(&ctx);
-        MD4_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
-        MD4_Final(out_char_data, &ctx);
-    }
-};
-
-struct MD5Impl
-{
-    static constexpr auto name = "MD5";
-    enum { length = MD5_DIGEST_LENGTH };
-
-    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
-    {
-        MD5_CTX ctx;
-        MD5_Init(&ctx);
-        MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
-        MD5_Final(out_char_data, &ctx);
-    }
-};
-
-struct SHA1Impl
-{
-    static constexpr auto name = "SHA1";
-    enum { length = SHA_DIGEST_LENGTH };
-
-    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
-    {
-        SHA_CTX ctx;
-        SHA1_Init(&ctx);
-        SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
-        SHA1_Final(out_char_data, &ctx);
-    }
-};
-
-struct SHA224Impl
-{
-    static constexpr auto name = "SHA224";
-    enum { length = SHA224_DIGEST_LENGTH };
-
-    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
-    {
-        SHA256_CTX ctx;
-        SHA224_Init(&ctx);
-        SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
-        SHA224_Final(out_char_data, &ctx);
-    }
-};
-
-struct SHA256Impl
-{
-    static constexpr auto name = "SHA256";
-    enum { length = SHA256_DIGEST_LENGTH };
-
-    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
-    {
-        SHA256_CTX ctx;
-        SHA256_Init(&ctx);
-        SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
-        SHA256_Final(out_char_data, &ctx);
-    }
-};
-
-struct SHA384Impl
-{
-    static constexpr auto name = "SHA384";
-    enum { length = SHA384_DIGEST_LENGTH };
-
-    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
-    {
-        SHA512_CTX ctx;
-        SHA384_Init(&ctx);
-        SHA384_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
-        SHA384_Final(out_char_data, &ctx);
-    }
-};
-
-struct SHA512Impl
-{
-    static constexpr auto name = "SHA512";
-    enum { length = 64 };
-
-    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
-    {
-        SHA512_CTX ctx;
-        SHA512_Init(&ctx);
-        SHA512_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
-        SHA512_Final(out_char_data, &ctx);
-    }
-};
-
-struct SHA512Impl256
-{
-    static constexpr auto name = "SHA512_256";
-    enum { length = 32 };
-
-    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
-    {
-        /// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default
-        /// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init,
-        /// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available
-        /// in the current version of OpenSSL that we use which necessitates the use of the EVP interface.
-        auto md_ctx = EVP_MD_CTX_create();
-        EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/);
-        EVP_DigestUpdate(md_ctx, begin, size);
-        EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/);
-        EVP_MD_CTX_destroy(md_ctx);
-    }
-};
 #endif
 
-struct SipHash64Impl
-{
-    static constexpr auto name = "sipHash64";
-    using ReturnType = UInt64;
-
-    static UInt64 apply(const char * begin, size_t size) { return sipHash64(begin, size); }
-    static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return combineHashesFunc<UInt64, SipHash64Impl>(h1, h2); }
-
-    static constexpr bool use_int_hash_for_pods = false;
-};
-
-struct SipHash64KeyedImpl
-{
-    static constexpr auto name = "sipHash64Keyed";
-    using ReturnType = UInt64;
-    using Key = impl::SipHashKey;
-    using KeyColumns = impl::SipHashKeyColumns;
-
-    static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
-    static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
-
-    static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
-
-    static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2)
-    {
-        transformEndianness<std::endian::little>(h1);
-        transformEndianness<std::endian::little>(h2);
-        const UInt64 hashes[]{h1, h2};
-        return applyKeyed(key, reinterpret_cast<const char *>(hashes), sizeof(hashes));
-    }
-
-    static constexpr bool use_int_hash_for_pods = false;
-};
-
 struct SipHash128Impl
 {
     static constexpr auto name = "sipHash128";
@@ -820,121 +692,6 @@ struct ImplXXH3
     static constexpr bool use_int_hash_for_pods = false;
 };
 
-struct ImplBLAKE3
-{
-    static constexpr auto name = "BLAKE3";
-    enum { length = 32 };
-
-#ifndef USE_BLAKE3
-    [[noreturn]] static void apply(const char * /*begin*/, const size_t /*size*/, unsigned char * /*out_char_data*/)
-    {
-        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available");
-    }
-#else
-    static void apply(const char * begin, const size_t size, unsigned char* out_char_data)
-    {
-        static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length);
-        auto & result = *reinterpret_cast<std::array<uint8_t, LLVM_BLAKE3_OUT_LEN> *>(out_char_data);
-
-        llvm::BLAKE3 hasher;
-        if (size > 0)
-            hasher.update(llvm::StringRef(begin, size));
-        hasher.final(result);
-    }
-#endif
-};
-
-template <typename Impl>
-class FunctionStringHashFixedString : public IFunction
-{
-public:
-    static constexpr auto name = Impl::name;
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringHashFixedString>(); }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override { return 1; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
-                arguments[0]->getName(), getName());
-
-        return std::make_shared<DataTypeFixedString>(Impl::length);
-    }
-
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
-    {
-        if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
-        {
-            auto col_to = ColumnFixedString::create(Impl::length);
-
-            const typename ColumnString::Chars & data = col_from->getChars();
-            const typename ColumnString::Offsets & offsets = col_from->getOffsets();
-            auto & chars_to = col_to->getChars();
-            const auto size = offsets.size();
-            chars_to.resize(size * Impl::length);
-
-            ColumnString::Offset current_offset = 0;
-            for (size_t i = 0; i < size; ++i)
-            {
-                Impl::apply(
-                    reinterpret_cast<const char *>(&data[current_offset]),
-                    offsets[i] - current_offset - 1,
-                    reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
-
-                current_offset = offsets[i];
-            }
-
-            return col_to;
-        }
-        else if (
-            const ColumnFixedString * col_from_fix = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
-        {
-            auto col_to = ColumnFixedString::create(Impl::length);
-            const typename ColumnFixedString::Chars & data = col_from_fix->getChars();
-            const auto size = col_from_fix->size();
-            auto & chars_to = col_to->getChars();
-            const auto length = col_from_fix->getN();
-            chars_to.resize(size * Impl::length);
-            for (size_t i = 0; i < size; ++i)
-            {
-                Impl::apply(
-                    reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
-            }
-            return col_to;
-        }
-        else if (
-            const ColumnIPv6 * col_from_ip = checkAndGetColumn<ColumnIPv6>(arguments[0].column.get()))
-        {
-            auto col_to = ColumnFixedString::create(Impl::length);
-            const typename ColumnIPv6::Container & data = col_from_ip->getData();
-            const auto size = col_from_ip->size();
-            auto & chars_to = col_to->getChars();
-            const auto length = IPV6_BINARY_LENGTH;
-            chars_to.resize(size * Impl::length);
-            for (size_t i = 0; i < size; ++i)
-            {
-                Impl::apply(
-                    reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
-            }
-            return col_to;
-        }
-        else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
-                    arguments[0].column->getName(), getName());
-    }
-};
-
-
 DECLARE_MULTITARGET_CODE(
 
 template <typename Impl, typename Name>
@@ -1817,15 +1574,7 @@ using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash
 using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
 using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
 #if USE_SSL
-using FunctionMD4 = FunctionStringHashFixedString<MD4Impl>;
 using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;
-using FunctionMD5 = FunctionStringHashFixedString<MD5Impl>;
-using FunctionSHA1 = FunctionStringHashFixedString<SHA1Impl>;
-using FunctionSHA224 = FunctionStringHashFixedString<SHA224Impl>;
-using FunctionSHA256 = FunctionStringHashFixedString<SHA256Impl>;
-using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
-using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
-using FunctionSHA512_256 = FunctionStringHashFixedString<SHA512Impl256>;
 #endif
 using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
 using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
@@ -1854,7 +1603,6 @@ using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
 using FunctionXXH3 = FunctionAnyHash<ImplXXH3>;
 
 using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
-using FunctionBLAKE3 = FunctionStringHashFixedString<ImplBLAKE3>;
 }
 
 #ifdef __clang__
diff --git a/src/Functions/FunctionsHashingMisc.cpp b/src/Functions/FunctionsHashingMisc.cpp
index f56568b2508..38f16af0e6d 100644
--- a/src/Functions/FunctionsHashingMisc.cpp
+++ b/src/Functions/FunctionsHashingMisc.cpp
@@ -46,19 +46,34 @@ REGISTER_FUNCTION(Hashing)
 
     factory.registerFunction<FunctionWyHash64>();
 
+#if USE_SSL
+    factory.registerFunction<FunctionHalfMD5>(FunctionDocumentation{
+        .description = R"(
+[Interprets](../..//sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input
+parameters as strings and calculates the MD5 hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the
+resulting string, and interprets them as [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order. The function is
+relatively slow (5 million short strings per second per processor core).
 
-    factory.registerFunction<FunctionBLAKE3>(
-    FunctionDocumentation{
-        .description=R"(
-Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
-This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
-The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
-It returns a BLAKE3 hash as a byte array with type FixedString(32).
-)",
-        .examples{
-            {"hash", "SELECT hex(BLAKE3('ABC'))", ""}},
-        .categories{"Hash"}
-    },
-    FunctionFactory::CaseSensitive);
+Consider using the [sipHash64](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash64) function instead.
+                       )",
+        .syntax = "SELECT halfMD5(par1,par2,...,parN);",
+        .arguments
+        = {{"par1,par2,...,parN",
+            R"(
+The function takes a variable number of input parameters. Arguments can be any of the supported data types. For some data types calculated
+value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed
+Tuple with the same data, Map and the corresponding Array(Tuple(key, value)) type with the same data).
+                       )"}},
+        .returned_value = "The computed half MD5 hash of the given input params returned as a "
+                          "[UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order.",
+        .examples
+        = {{"",
+            "SELECT HEX(halfMD5('abc', 'cde', 'fgh'));",
+            R"(
+┌─hex(halfMD5('abc', 'cde', 'fgh'))─┐
+│ 2C9506B7374CFAF4                  │
+└───────────────────────────────────┘
+            )"}}});
+#endif
 }
 }
diff --git a/src/Functions/FunctionsHashingSSL.cpp b/src/Functions/FunctionsHashingSSL.cpp
deleted file mode 100644
index 3e109b8a11d..00000000000
--- a/src/Functions/FunctionsHashingSSL.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-#include "config.h"
-
-#if USE_SSL
-
-#include "FunctionsHashing.h"
-#include <Functions/FunctionFactory.h>
-
-/// FunctionsHashing instantiations are separated into files FunctionsHashing*.cpp
-/// to better parallelize the build procedure and avoid MSan build failure
-/// due to excessive resource consumption.
-
-namespace DB
-{
-
-REGISTER_FUNCTION(HashingSSL)
-{
-    factory.registerFunction<FunctionMD4>(FunctionDocumentation{
-        .description = R"(Calculates the MD4 hash of the given string.)",
-        .syntax = "SELECT MD4(s);",
-        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
-        .returned_value
-        = "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
-        .examples
-        = {{"",
-            "SELECT HEX(MD4('abc'));",
-            R"(
-┌─hex(MD4('abc'))──────────────────┐
-│ A448017AAF21D8525FC10AE87AA6729D │
-└──────────────────────────────────┘
-            )"
-          }}
-    });
-    factory.registerFunction<FunctionHalfMD5>(FunctionDocumentation{
-        .description = R"(
-[Interprets](../..//sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input
-parameters as strings and calculates the MD5 hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the
-resulting string, and interprets them as [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order. The function is
-relatively slow (5 million short strings per second per processor core).
-
-Consider using the [sipHash64](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash64) function instead.
-                       )",
-        .syntax = "SELECT halfMD5(par1,par2,...,parN);",
-        .arguments = {{"par1,par2,...,parN",
-                       R"(
-The function takes a variable number of input parameters. Arguments can be any of the supported data types. For some data types calculated
-value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed
-Tuple with the same data, Map and the corresponding Array(Tuple(key, value)) type with the same data).
-                       )"
-                     }},
-        .returned_value
-        = "The computed half MD5 hash of the given input params returned as a [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order.",
-        .examples
-        = {{"",
-            "SELECT HEX(halfMD5('abc', 'cde', 'fgh'));",
-            R"(
-┌─hex(halfMD5('abc', 'cde', 'fgh'))─┐
-│ 2C9506B7374CFAF4                  │
-└───────────────────────────────────┘
-            )"
-          }}
-    });
-    factory.registerFunction<FunctionMD5>(FunctionDocumentation{
-        .description = R"(Calculates the MD5 hash of the given string.)",
-        .syntax = "SELECT MD5(s);",
-        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
-        .returned_value
-        = "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
-        .examples
-        = {{"",
-            "SELECT HEX(MD5('abc'));",
-            R"(
-┌─hex(MD5('abc'))──────────────────┐
-│ 900150983CD24FB0D6963F7D28E17F72 │
-└──────────────────────────────────┘
-            )"
-          }}
-    });
-    factory.registerFunction<FunctionSHA1>(FunctionDocumentation{
-        .description = R"(Calculates the SHA1 hash of the given string.)",
-        .syntax = "SELECT SHA1(s);",
-        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
-        .returned_value
-        = "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
-        .examples
-        = {{"",
-            "SELECT HEX(SHA1('abc'));",
-            R"(
-┌─hex(SHA1('abc'))─────────────────────────┐
-│ A9993E364706816ABA3E25717850C26C9CD0D89D │
-└──────────────────────────────────────────┘
-            )"
-          }}
-    });
-    factory.registerFunction<FunctionSHA224>(FunctionDocumentation{
-        .description = R"(Calculates the SHA224 hash of the given string.)",
-        .syntax = "SELECT SHA224(s);",
-        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
-        .returned_value
-        = "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
-        .examples
-        = {{"",
-            "SELECT HEX(SHA224('abc'));",
-            R"(
-┌─hex(SHA224('abc'))───────────────────────────────────────┐
-│ 23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7 │
-└──────────────────────────────────────────────────────────┘
-            )"
-          }}
-    });
-    factory.registerFunction<FunctionSHA256>(FunctionDocumentation{
-        .description = R"(Calculates the SHA256 hash of the given string.)",
-        .syntax = "SELECT SHA256(s);",
-        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
-        .returned_value
-        = "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
-        .examples
-        = {{"",
-            "SELECT HEX(SHA256('abc'));",
-            R"(
-┌─hex(SHA256('abc'))───────────────────────────────────────────────┐
-│ BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD │
-└──────────────────────────────────────────────────────────────────┘
-            )"
-          }}
-    });
-    factory.registerFunction<FunctionSHA384>(FunctionDocumentation{
-        .description = R"(Calculates the SHA384 hash of the given string.)",
-        .syntax = "SELECT SHA384(s);",
-        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
-        .returned_value
-        = "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
-        .examples
-        = {{"",
-            "SELECT HEX(SHA384('abc'));",
-            R"(
-┌─hex(SHA384('abc'))───────────────────────────────────────────────────────────────────────────────┐
-│ CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7 │
-└──────────────────────────────────────────────────────────────────────────────────────────────────┘
-            )"
-          }}
-    });
-    factory.registerFunction<FunctionSHA512>(FunctionDocumentation{
-        .description = R"(Calculates the SHA512 hash of the given string.)",
-        .syntax = "SELECT SHA512(s);",
-        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
-        .returned_value
-        = "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
-        .examples
-        = {{"",
-            "SELECT HEX(SHA512('abc'));",
-            R"(
-┌─hex(SHA512('abc'))───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
-│ DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F │
-└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
-            )"
-          }}
-    });
-    factory.registerFunction<FunctionSHA512_256>(FunctionDocumentation{
-        .description = R"(Calculates the SHA512_256 hash of the given string.)",
-        .syntax = "SELECT SHA512_256(s);",
-        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
-        .returned_value
-        = "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
-        .examples
-        = {{"",
-            "SELECT HEX(SHA512_256('abc'));",
-            R"(
-┌─hex(SHA512_256('abc'))───────────────────────────────────────────┐
-│ 53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23 │
-└──────────────────────────────────────────────────────────────────┘
-            )"
-          }}
-    });
-}
-}
-
-#endif
diff --git a/src/Functions/FunctionsStringHashFixedString.cpp b/src/Functions/FunctionsStringHashFixedString.cpp
new file mode 100644
index 00000000000..f7134953d52
--- /dev/null
+++ b/src/Functions/FunctionsStringHashFixedString.cpp
@@ -0,0 +1,441 @@
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <base/IPv4andIPv6.h>
+
+#include "config.h"
+
+#if USE_BLAKE3
+#    include <llvm/Support/BLAKE3.h>
+#endif
+
+#if USE_SSL
+#    include <openssl/md4.h>
+#    include <openssl/md5.h>
+#    include <openssl/sha.h>
+#    if USE_BORINGSSL
+#        include <openssl/digest.h>
+#    else
+#        include <openssl/evp.h>
+#    endif
+#endif
+
+/// Instatiating only the functions that require FunctionStringHashFixedString in a separate file
+/// to better parallelize the build procedure and avoid MSan build failure
+/// due to excessive resource consumption.
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int ILLEGAL_COLUMN;
+}
+
+
+#if USE_SSL
+
+struct MD4Impl
+{
+    static constexpr auto name = "MD4";
+    enum
+    {
+        length = MD4_DIGEST_LENGTH
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        MD4_CTX ctx;
+        MD4_Init(&ctx);
+        MD4_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
+        MD4_Final(out_char_data, &ctx);
+    }
+};
+
+struct MD5Impl
+{
+    static constexpr auto name = "MD5";
+    enum
+    {
+        length = MD5_DIGEST_LENGTH
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        MD5_CTX ctx;
+        MD5_Init(&ctx);
+        MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
+        MD5_Final(out_char_data, &ctx);
+    }
+};
+
+struct SHA1Impl
+{
+    static constexpr auto name = "SHA1";
+    enum
+    {
+        length = SHA_DIGEST_LENGTH
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        SHA_CTX ctx;
+        SHA1_Init(&ctx);
+        SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
+        SHA1_Final(out_char_data, &ctx);
+    }
+};
+
+struct SHA224Impl
+{
+    static constexpr auto name = "SHA224";
+    enum
+    {
+        length = SHA224_DIGEST_LENGTH
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        SHA256_CTX ctx;
+        SHA224_Init(&ctx);
+        SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
+        SHA224_Final(out_char_data, &ctx);
+    }
+};
+
+struct SHA256Impl
+{
+    static constexpr auto name = "SHA256";
+    enum
+    {
+        length = SHA256_DIGEST_LENGTH
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        SHA256_CTX ctx;
+        SHA256_Init(&ctx);
+        SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
+        SHA256_Final(out_char_data, &ctx);
+    }
+};
+
+struct SHA384Impl
+{
+    static constexpr auto name = "SHA384";
+    enum
+    {
+        length = SHA384_DIGEST_LENGTH
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        SHA512_CTX ctx;
+        SHA384_Init(&ctx);
+        SHA384_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
+        SHA384_Final(out_char_data, &ctx);
+    }
+};
+
+struct SHA512Impl
+{
+    static constexpr auto name = "SHA512";
+    enum
+    {
+        length = 64
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        SHA512_CTX ctx;
+        SHA512_Init(&ctx);
+        SHA512_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
+        SHA512_Final(out_char_data, &ctx);
+    }
+};
+
+struct SHA512Impl256
+{
+    static constexpr auto name = "SHA512_256";
+    enum
+    {
+        length = 32
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        /// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default
+        /// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init,
+        /// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available
+        /// in the current version of OpenSSL that we use which necessitates the use of the EVP interface.
+        auto md_ctx = EVP_MD_CTX_create();
+        EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/);
+        EVP_DigestUpdate(md_ctx, begin, size);
+        EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/);
+        EVP_MD_CTX_destroy(md_ctx);
+    }
+};
+#endif
+
+#if USE_BLAKE3
+struct ImplBLAKE3
+{
+    static constexpr auto name = "BLAKE3";
+    enum
+    {
+        length = 32
+    };
+
+    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
+    {
+        static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length);
+        auto & result = *reinterpret_cast<std::array<uint8_t, LLVM_BLAKE3_OUT_LEN> *>(out_char_data);
+
+        llvm::BLAKE3 hasher;
+        if (size > 0)
+            hasher.update(llvm::StringRef(begin, size));
+        hasher.final(result);
+    }
+};
+
+#endif
+
+template <typename Impl>
+class FunctionStringHashFixedString : public IFunction
+{
+public:
+    static constexpr auto name = Impl::name;
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringHashFixedString>(); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 1; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0]))
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
+
+        return std::make_shared<DataTypeFixedString>(Impl::length);
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    {
+        if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
+        {
+            auto col_to = ColumnFixedString::create(Impl::length);
+
+            const typename ColumnString::Chars & data = col_from->getChars();
+            const typename ColumnString::Offsets & offsets = col_from->getOffsets();
+            auto & chars_to = col_to->getChars();
+            const auto size = offsets.size();
+            chars_to.resize(size * Impl::length);
+
+            ColumnString::Offset current_offset = 0;
+            for (size_t i = 0; i < size; ++i)
+            {
+                Impl::apply(
+                    reinterpret_cast<const char *>(&data[current_offset]),
+                    offsets[i] - current_offset - 1,
+                    reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
+
+                current_offset = offsets[i];
+            }
+
+            return col_to;
+        }
+        else if (const ColumnFixedString * col_from_fix = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
+        {
+            auto col_to = ColumnFixedString::create(Impl::length);
+            const typename ColumnFixedString::Chars & data = col_from_fix->getChars();
+            const auto size = col_from_fix->size();
+            auto & chars_to = col_to->getChars();
+            const auto length = col_from_fix->getN();
+            chars_to.resize(size * Impl::length);
+            for (size_t i = 0; i < size; ++i)
+            {
+                Impl::apply(
+                    reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
+            }
+            return col_to;
+        }
+        else if (const ColumnIPv6 * col_from_ip = checkAndGetColumn<ColumnIPv6>(arguments[0].column.get()))
+        {
+            auto col_to = ColumnFixedString::create(Impl::length);
+            const typename ColumnIPv6::Container & data = col_from_ip->getData();
+            const auto size = col_from_ip->size();
+            auto & chars_to = col_to->getChars();
+            const auto length = IPV6_BINARY_LENGTH;
+            chars_to.resize(size * Impl::length);
+            for (size_t i = 0; i < size; ++i)
+            {
+                Impl::apply(
+                    reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
+            }
+            return col_to;
+        }
+        else
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of first argument of function {}",
+                arguments[0].column->getName(),
+                getName());
+    }
+};
+
+#if USE_SSL || USE_BLAKE3
+REGISTER_FUNCTION(HashFixedStrings)
+{
+#    if USE_SSL
+    using FunctionMD4 = FunctionStringHashFixedString<MD4Impl>;
+    using FunctionMD5 = FunctionStringHashFixedString<MD5Impl>;
+    using FunctionSHA1 = FunctionStringHashFixedString<SHA1Impl>;
+    using FunctionSHA224 = FunctionStringHashFixedString<SHA224Impl>;
+    using FunctionSHA256 = FunctionStringHashFixedString<SHA256Impl>;
+    using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
+    using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
+    using FunctionSHA512_256 = FunctionStringHashFixedString<SHA512Impl256>;
+
+    factory.registerFunction<FunctionMD4>(FunctionDocumentation{
+        .description = R"(Calculates the MD4 hash of the given string.)",
+        .syntax = "SELECT MD4(s);",
+        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
+        .returned_value
+        = "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
+        .examples
+        = {{"",
+            "SELECT HEX(MD4('abc'));",
+            R"(
+┌─hex(MD4('abc'))──────────────────┐
+│ A448017AAF21D8525FC10AE87AA6729D │
+└──────────────────────────────────┘
+            )"}}});
+    factory.registerFunction<FunctionMD5>(FunctionDocumentation{
+        .description = R"(Calculates the MD5 hash of the given string.)",
+        .syntax = "SELECT MD5(s);",
+        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
+        .returned_value
+        = "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
+        .examples
+        = {{"",
+            "SELECT HEX(MD5('abc'));",
+            R"(
+┌─hex(MD5('abc'))──────────────────┐
+│ 900150983CD24FB0D6963F7D28E17F72 │
+└──────────────────────────────────┘
+            )"}}});
+    factory.registerFunction<FunctionSHA1>(FunctionDocumentation{
+        .description = R"(Calculates the SHA1 hash of the given string.)",
+        .syntax = "SELECT SHA1(s);",
+        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
+        .returned_value
+        = "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
+        .examples
+        = {{"",
+            "SELECT HEX(SHA1('abc'));",
+            R"(
+┌─hex(SHA1('abc'))─────────────────────────┐
+│ A9993E364706816ABA3E25717850C26C9CD0D89D │
+└──────────────────────────────────────────┘
+            )"}}});
+    factory.registerFunction<FunctionSHA224>(FunctionDocumentation{
+        .description = R"(Calculates the SHA224 hash of the given string.)",
+        .syntax = "SELECT SHA224(s);",
+        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
+        .returned_value
+        = "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
+        .examples
+        = {{"",
+            "SELECT HEX(SHA224('abc'));",
+            R"(
+┌─hex(SHA224('abc'))───────────────────────────────────────┐
+│ 23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7 │
+└──────────────────────────────────────────────────────────┘
+            )"}}});
+    factory.registerFunction<FunctionSHA256>(FunctionDocumentation{
+        .description = R"(Calculates the SHA256 hash of the given string.)",
+        .syntax = "SELECT SHA256(s);",
+        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
+        .returned_value
+        = "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
+        .examples
+        = {{"",
+            "SELECT HEX(SHA256('abc'));",
+            R"(
+┌─hex(SHA256('abc'))───────────────────────────────────────────────┐
+│ BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD │
+└──────────────────────────────────────────────────────────────────┘
+            )"}}});
+    factory.registerFunction<FunctionSHA384>(FunctionDocumentation{
+        .description = R"(Calculates the SHA384 hash of the given string.)",
+        .syntax = "SELECT SHA384(s);",
+        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
+        .returned_value
+        = "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
+        .examples
+        = {{"",
+            "SELECT HEX(SHA384('abc'));",
+            R"(
+┌─hex(SHA384('abc'))───────────────────────────────────────────────────────────────────────────────┐
+│ CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7 │
+└──────────────────────────────────────────────────────────────────────────────────────────────────┘
+            )"}}});
+    factory.registerFunction<FunctionSHA512>(FunctionDocumentation{
+        .description = R"(Calculates the SHA512 hash of the given string.)",
+        .syntax = "SELECT SHA512(s);",
+        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
+        .returned_value
+        = "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
+        .examples
+        = {{"",
+            "SELECT HEX(SHA512('abc'));",
+            R"(
+┌─hex(SHA512('abc'))───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+│ DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F │
+└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+            )"}}});
+    factory.registerFunction<FunctionSHA512_256>(FunctionDocumentation{
+        .description = R"(Calculates the SHA512_256 hash of the given string.)",
+        .syntax = "SELECT SHA512_256(s);",
+        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
+        .returned_value
+        = "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
+        .examples
+        = {{"",
+            "SELECT HEX(SHA512_256('abc'));",
+            R"(
+┌─hex(SHA512_256('abc'))───────────────────────────────────────────┐
+│ 53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23 │
+└──────────────────────────────────────────────────────────────────┘
+            )"}}});
+
+
+#    endif
+
+#    if USE_BLAKE3
+    using FunctionBLAKE3 = FunctionStringHashFixedString<ImplBLAKE3>;
+    factory.registerFunction<FunctionBLAKE3>(
+        FunctionDocumentation{
+            .description = R"(
+    Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
+    This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
+    The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
+    It returns a BLAKE3 hash as a byte array with type FixedString(32).
+    )",
+            .examples{{"hash", "SELECT hex(BLAKE3('ABC'))", ""}},
+            .categories{"Hash"}},
+        FunctionFactory::CaseSensitive);
+#    endif
+
+#endif
+}
+
+}

From 140f6dafd8c01b2fed08b58abdfbab400982a071 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 18 Dec 2023 19:16:50 +0100
Subject: [PATCH 089/137] hung_check check with lighter queries

---
 tests/clickhouse-test | 140 +++++++++++++++++++++++++++++-------------
 1 file changed, 97 insertions(+), 43 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 1609d8f3c07..c1bc8c9e559 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -32,7 +32,7 @@ from typing import Tuple, Union, Optional, Dict, Set, List
 import subprocess
 from subprocess import Popen
 from subprocess import PIPE
-from datetime import datetime
+from datetime import datetime, timedelta
 from time import time, sleep
 from errno import ESRCH
 
@@ -278,37 +278,44 @@ def need_retry(args, stdout, stderr, total_time):
         msg in stderr for msg in MESSAGES_TO_RETRY
     )
 
+def get_processlist_size(args):
+    if args.replicated_database:
+        return int(
+            clickhouse_execute(
+                args,
+                """
+                SELECT
+                    count()
+                FROM 
+                    FROM system.processes
+                    WHERE query NOT LIKE '%system.processes%'
+                ))
+                FORMAT Vertical
+                """,
+                ).strip()
+        )
+    else:
+        return int(
+                clickhouse_execute(
+                args,
+                """
+            SELECT
+                count()
+            FROM system.processes 
+            WHERE query NOT LIKE '%system.processes%'
+            FORMAT Vertical
+            """,
+            ).strip()
+        )
 
 def get_processlist_with_stacktraces(args):
-    try:
-        if args.replicated_database:
-            return clickhouse_execute(
-                args,
-                """
-            SELECT materialize(hostName() || '::' || tcpPort()::String) as host_port, *
-            -- NOTE: view() here to do JOIN on shards, instead of initiator
-            FROM clusterAllReplicas('test_cluster_database_replicated', view(
-                SELECT
-                    p.*,
-                    arrayStringConcat(groupArray('Thread ID ' || toString(s.thread_id) || '\n' || arrayStringConcat(arrayMap(
-                        x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))),
-                        s.trace), '\n') AS stacktrace
-                    )) AS stacktraces
-                FROM system.processes p
-                JOIN system.stack_trace s USING (query_id)
-                WHERE query NOT LIKE '%system.processes%'
-                GROUP BY p.*
-            ))
-            ORDER BY elapsed DESC FORMAT Vertical
-            """,
-                settings={
-                    "allow_introspection_functions": 1,
-                },
-            )
-        else:
-            return clickhouse_execute(
-                args,
-                """
+    if args.replicated_database:
+        return clickhouse_execute(
+            args,
+            """
+        SELECT materialize(hostName() || '::' || tcpPort()::String) as host_port, *
+        -- NOTE: view() here to do JOIN on shards, instead of initiator
+        FROM clusterAllReplicas('test_cluster_database_replicated', view(
             SELECT
                 p.*,
                 arrayStringConcat(groupArray('Thread ID ' || toString(s.thread_id) || '\n' || arrayStringConcat(arrayMap(
@@ -319,14 +326,36 @@ def get_processlist_with_stacktraces(args):
             JOIN system.stack_trace s USING (query_id)
             WHERE query NOT LIKE '%system.processes%'
             GROUP BY p.*
-            ORDER BY elapsed DESC FORMAT Vertical
-            """,
-                settings={
-                    "allow_introspection_functions": 1,
-                },
-            )
-    except Exception as e:
-        return "Failed to get processlist: " + str(e)
+        ))
+        ORDER BY elapsed DESC FORMAT Vertical
+        """,
+            settings={
+                "allow_introspection_functions": 1,
+            },
+            timeout=120,
+        )
+    else:
+        return clickhouse_execute(
+            args,
+            """
+        SELECT
+            p.*,
+            arrayStringConcat(groupArray('Thread ID ' || toString(s.thread_id) || '\n' || arrayStringConcat(arrayMap(
+                x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))),
+                s.trace), '\n') AS stacktrace
+            )) AS stacktraces
+        FROM system.processes p
+        JOIN system.stack_trace s USING (query_id)
+        WHERE query NOT LIKE '%system.processes%'
+        GROUP BY p.*
+        ORDER BY elapsed DESC FORMAT Vertical
+        """,
+            settings={
+                "allow_introspection_functions": 1,
+            },
+            timeout=120,
+        )
+
 
 
 def get_transactions_list(args):
@@ -2420,11 +2449,36 @@ def main(args):
 
     if args.hung_check:
         # Some queries may execute in background for some time after test was finished. This is normal.
-        for _ in range(1, 60):
-            processlist = get_processlist_with_stacktraces(args)
-            if not processlist:
-                break
-            sleep(1)
+        print("Checking the hung queries: ", end='')
+        hung_count = 0
+        try:
+            deadline = datetime.now() + timedelta(seconds=90)
+            while datetime.now() < deadline:
+                hung_count = get_processlist_size(args)
+                if hung_count == 0:
+                    print(" done")
+                    break
+                print(". ", end='')
+        except Exception as e:
+            print(
+                colored(
+                    "\nHung check failed. Failed to get processlist size: " + str(e), args, "red", attrs=["bold"]
+                )
+            )
+            exit_code.value = 1
+
+
+        processlist = ""
+        if hung_count > 0:
+            try:
+                processlist = get_processlist_with_stacktraces(args)
+            except Exception as e:
+                print(
+                    colored(
+                        "\nHung check failed, Failed to get processlist with stacktraces: " + str(e), args, "red", attrs=["bold"]
+                    )
+                )
+                exit_code.value = 1
 
         if processlist:
             print(

From 939d602c3c83df7019d63fe4899b46a364dc26d5 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 18 Dec 2023 19:26:33 +0100
Subject: [PATCH 090/137] fix typo

---
 docker/test/stateless/utils.lib | 2 --
 tests/clickhouse-test           | 5 ++---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib
index a30e05b46ff..9b6ab535a90 100644
--- a/docker/test/stateless/utils.lib
+++ b/docker/test/stateless/utils.lib
@@ -49,5 +49,3 @@ function timeout_with_logging() {
 }
 
 # vi: ft=bash
-
-}
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index c1bc8c9e559..1ba67a3b2f4 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -288,8 +288,7 @@ def get_processlist_size(args):
                     count()
                 FROM 
                     FROM system.processes
-                    WHERE query NOT LIKE '%system.processes%'
-                ))
+                WHERE query NOT LIKE '%system.processes%'
                 FORMAT Vertical
                 """,
                 ).strip()
@@ -2475,7 +2474,7 @@ def main(args):
             except Exception as e:
                 print(
                     colored(
-                        "\nHung check failed, Failed to get processlist with stacktraces: " + str(e), args, "red", attrs=["bold"]
+                        "\nHung check failed. Failed to get processlist with stacktraces: " + str(e), args, "red", attrs=["bold"]
                     )
                 )
                 exit_code.value = 1

From de7a349f0d0918e9352c5e7145d63588949b6820 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Mon, 18 Dec 2023 20:44:50 +0100
Subject: [PATCH 091/137] Update MergeTreeData.cpp

---
 src/Storages/MergeTree/MergeTreeData.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index d7b444f7763..da0dc6625c2 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4065,9 +4065,12 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
 
         auto is_appropriate_state = [] (const DataPartPtr & part_)
         {
-            if (part_->getState() != DataPartState::Outdated)
+            /// In rare cases, we may have a chain of unexpected parts that cover common source parts, e.g. all_1_2_3, all_1_3_4
+            /// It may happen as a result of interrupted cloneReplica
+            bool already_active = part_->getState() == DataPartState::Active;
+            if (!already_active && part_->getState() != DataPartState::Outdated)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to restore a part {} from unexpected state: {}", part_->name, part_->getState());
-            return true;
+            return !already_active;
         };
 
         auto activate_part = [this, &restored_active_part](auto it)

From 4109304b2a629cfe3a5ff6b345914ebf72e652e0 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 18 Dec 2023 19:50:01 +0000
Subject: [PATCH 092/137] Test parallel replicas with force_primary_key setting

---
 .../02898_parallel_replicas_progress_bar.sql  |  8 +--
 ...allel_replicas_force_primary_key.reference |  6 +++
 ...46_parallel_replicas_force_primary_key.sql | 49 +++++++++++++++++++
 3 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.reference
 create mode 100644 tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.sql

diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
index 6b2f146efd0..d8bfec12b3a 100644
--- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
+++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
@@ -2,9 +2,9 @@ DROP TABLE IF EXISTS t1 SYNC;
 DROP TABLE IF EXISTS t2 SYNC;
 DROP TABLE IF EXISTS t3 SYNC;
 
-CREATE TABLE t1(k UInt32, v String) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r1') ORDER BY k;
-CREATE TABLE t2(k UInt32, v String) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r2') ORDER BY k;
-CREATE TABLE t3(k UInt32, v String) ENGINE ReplicatedMergeTree('/parallel_replicas/{database}/test_tbl', 'r3') ORDER BY k;
+CREATE TABLE t1(k UInt32, v String) ENGINE ReplicatedMergeTree('/02898_parallel_replicas/{database}/test_tbl', 'r1') ORDER BY k;
+CREATE TABLE t2(k UInt32, v String) ENGINE ReplicatedMergeTree('/02898_parallel_replicas/{database}/test_tbl', 'r2') ORDER BY k;
+CREATE TABLE t3(k UInt32, v String) ENGINE ReplicatedMergeTree('/02898_parallel_replicas/{database}/test_tbl', 'r3') ORDER BY k;
 
 insert into t1 select number, toString(number) from numbers(1000, 1000);
 insert into t2 select number, toString(number) from numbers(2000, 1000);
@@ -14,7 +14,7 @@ system sync replica t1;
 system sync replica t2;
 system sync replica t3;
 
-SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost';
+SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost';
 
 -- default coordinator
 SELECT count(), min(k), max(k), avg(k) FROM t1 SETTINGS log_comment='02898_default_190aed82-2423-413b-ad4c-24dcca50f65b';
diff --git a/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.reference b/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.reference
new file mode 100644
index 00000000000..64dfee7b7a1
--- /dev/null
+++ b/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.reference
@@ -0,0 +1,6 @@
+1	750
+2	750
+3	750
+1	750
+2	750
+3	750
diff --git a/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.sql b/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.sql
new file mode 100644
index 00000000000..d33c8cdbc93
--- /dev/null
+++ b/tests/queries/0_stateless/02946_parallel_replicas_force_primary_key.sql
@@ -0,0 +1,49 @@
+DROP TABLE IF EXISTS t1 SYNC;
+DROP TABLE IF EXISTS t2 SYNC;
+DROP TABLE IF EXISTS t3 SYNC;
+
+CREATE TABLE t1(k UInt32, v String) ENGINE ReplicatedMergeTree('/02946_parallel_replicas/{database}/test_tbl', 'r1') ORDER BY k;
+CREATE TABLE t2(k UInt32, v String) ENGINE ReplicatedMergeTree('/02946_parallel_replicas/{database}/test_tbl', 'r2') ORDER BY k;
+CREATE TABLE t3(k UInt32, v String) ENGINE ReplicatedMergeTree('/02946_parallel_replicas/{database}/test_tbl', 'r3') ORDER BY k;
+
+insert into t1 select number % 4, toString(number) from numbers(1000, 1000);
+insert into t2 select number % 4, toString(number) from numbers(2000, 1000);
+insert into t3 select number % 4, toString(number) from numbers(3000, 1000);
+
+system sync replica t1;
+system sync replica t2;
+system sync replica t3;
+
+-- w/o parallel replicas
+SELECT
+    k,
+    count()
+FROM t1
+WHERE k > 0
+GROUP BY k
+ORDER BY k
+SETTINGS force_primary_key = 1, allow_experimental_parallel_reading_from_replicas = 0;
+
+-- parallel replicas, primary key is used
+SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost';
+SELECT
+    k,
+    count()
+FROM t1
+WHERE k > 0
+GROUP BY k
+ORDER BY k
+SETTINGS force_primary_key = 1;
+
+-- parallel replicas, primary key is NOT used
+SELECT
+    k,
+    count()
+FROM t1
+GROUP BY k
+ORDER BY k
+SETTINGS force_primary_key = 1; -- { serverError INDEX_NOT_USED }
+
+DROP TABLE t1 SYNC;
+DROP TABLE t2 SYNC;
+DROP TABLE t3 SYNC;

From a8ef051bb2e1837fd301d2db0c6dcc6c8f1b85d6 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 18 Dec 2023 20:59:22 +0100
Subject: [PATCH 093/137] Update
 00002_log_and_exception_messages_formatting.sql

---
 .../0_stateless/00002_log_and_exception_messages_formatting.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 062806baae9..45ce28929e0 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -10,7 +10,7 @@ create view logs as select * from system.text_log where now() - toIntervalMinute
 -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation.
 -- 0.001 threshold should be always enough, the value was about 0.00025
 select 'runtime messages', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.001) from logs
-    where message not like '% Received from %clickhouse-staging.com:9440%';
+    where message not like '% Received from %clickhouse-staging.com:9440%' and source_file not like '%/AWSLogger.cpp%';
 
 -- Check the same for exceptions. The value was 0.03
 select 'runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs

From 27d632f5e4bd5c0e63e1e6d4db99b9f3e7b010c0 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 18 Dec 2023 20:02:20 +0000
Subject: [PATCH 094/137] fix race in operations with external temporary data
 in cache

---
 src/Interpreters/Cache/Metadata.cpp      | 6 ++++++
 src/Interpreters/TemporaryDataOnDisk.cpp | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp
index 4441fe20aaa..6e3a97fb8d1 100644
--- a/src/Interpreters/Cache/Metadata.cpp
+++ b/src/Interpreters/Cache/Metadata.cpp
@@ -352,7 +352,10 @@ CacheMetadata::removeEmptyKey(
     try
     {
         if (fs::exists(key_directory))
+        {
             fs::remove_all(key_directory);
+            LOG_TEST(log, "Directory ({}) for key {} removed", key_directory.string(), key);
+        }
     }
     catch (...)
     {
@@ -365,7 +368,10 @@ CacheMetadata::removeEmptyKey(
     {
         std::unique_lock mutex(key_prefix_directory_mutex);
         if (fs::exists(key_prefix_directory) && fs::is_empty(key_prefix_directory))
+        {
             fs::remove(key_prefix_directory);
+            LOG_TEST(log, "Prefix directory ({}) for key {} removed", key_prefix_directory.string(), key);
+        }
     }
     catch (...)
     {
diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp
index 161535afa68..96aa642295c 100644
--- a/src/Interpreters/TemporaryDataOnDisk.cpp
+++ b/src/Interpreters/TemporaryDataOnDisk.cpp
@@ -106,7 +106,10 @@ FileSegmentsHolderPtr TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
 
     const auto key = FileSegment::Key::random();
     auto holder = file_cache->set(key, 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
-    fs::create_directories(file_cache->getPathInLocalCache(key));
+
+    chassert(holder->size() == 1);
+    holder->back().getKeyMetadata()->createBaseDirectory();
+
     return holder;
 }
 

From 8811868287dc8a5399df87d7a1f61c4f91adf923 Mon Sep 17 00:00:00 2001
From: AN <bezik@users.noreply.github.com>
Date: Mon, 18 Dec 2023 18:31:44 +0300
Subject: [PATCH 095/137] Update criteo.md [ru]

language fixes
---
 docs/ru/getting-started/example-datasets/criteo.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/ru/getting-started/example-datasets/criteo.md b/docs/ru/getting-started/example-datasets/criteo.md
index 5ba55795632..4818e9e69d4 100644
--- a/docs/ru/getting-started/example-datasets/criteo.md
+++ b/docs/ru/getting-started/example-datasets/criteo.md
@@ -1,14 +1,14 @@
 ---
 slug: /ru/getting-started/example-datasets/criteo
 sidebar_position: 18
-sidebar_label: "Терабайт логов кликов от Criteo"
+sidebar_label: "Терабайтный журнал посещений сайта от Criteo"
 ---
 
-# Терабайт логов кликов от Criteo {#terabait-logov-klikov-ot-criteo}
+# Терабайтный журнал посещений сайта от Criteo {#terabaitnyi-zhurnal}
 
 Скачайте данные с http://labs.criteo.com/downloads/download-terabyte-click-logs/
 
-Создайте таблицу для импорта лога:
+Создайте таблицу для импорта журнала:
 
 ``` sql
 CREATE TABLE criteo_log (date Date, clicked UInt8, int1 Int32, int2 Int32, int3 Int32, int4 Int32, int5 Int32, int6 Int32, int7 Int32, int8 Int32, int9 Int32, int10 Int32, int11 Int32, int12 Int32, int13 Int32, cat1 String, cat2 String, cat3 String, cat4 String, cat5 String, cat6 String, cat7 String, cat8 String, cat9 String, cat10 String, cat11 String, cat12 String, cat13 String, cat14 String, cat15 String, cat16 String, cat17 String, cat18 String, cat19 String, cat20 String, cat21 String, cat22 String, cat23 String, cat24 String, cat25 String, cat26 String) ENGINE = Log
@@ -69,7 +69,7 @@ CREATE TABLE criteo
 ) ENGINE = MergeTree(date, intHash32(icat1), (date, intHash32(icat1)), 8192)
 ```
 
-Преобразуем данные из сырого лога и положим во вторую таблицу:
+Преобразуйте импортированные данные, разложив их по таблице сконвертированных данных:
 
 ``` sql
 INSERT INTO criteo SELECT date, clicked, int1, int2, int3, int4, int5, int6, int7, int8, int9, int10, int11, int12, int13, reinterpretAsUInt32(unhex(cat1)) AS icat1, reinterpretAsUInt32(unhex(cat2)) AS icat2, reinterpretAsUInt32(unhex(cat3)) AS icat3, reinterpretAsUInt32(unhex(cat4)) AS icat4, reinterpretAsUInt32(unhex(cat5)) AS icat5, reinterpretAsUInt32(unhex(cat6)) AS icat6, reinterpretAsUInt32(unhex(cat7)) AS icat7, reinterpretAsUInt32(unhex(cat8)) AS icat8, reinterpretAsUInt32(unhex(cat9)) AS icat9, reinterpretAsUInt32(unhex(cat10)) AS icat10, reinterpretAsUInt32(unhex(cat11)) AS icat11, reinterpretAsUInt32(unhex(cat12)) AS icat12, reinterpretAsUInt32(unhex(cat13)) AS icat13, reinterpretAsUInt32(unhex(cat14)) AS icat14, reinterpretAsUInt32(unhex(cat15)) AS icat15, reinterpretAsUInt32(unhex(cat16)) AS icat16, reinterpretAsUInt32(unhex(cat17)) AS icat17, reinterpretAsUInt32(unhex(cat18)) AS icat18, reinterpretAsUInt32(unhex(cat19)) AS icat19, reinterpretAsUInt32(unhex(cat20)) AS icat20, reinterpretAsUInt32(unhex(cat21)) AS icat21, reinterpretAsUInt32(unhex(cat22)) AS icat22, reinterpretAsUInt32(unhex(cat23)) AS icat23, reinterpretAsUInt32(unhex(cat24)) AS icat24, reinterpretAsUInt32(unhex(cat25)) AS icat25, reinterpretAsUInt32(unhex(cat26)) AS icat26 FROM criteo_log;

From ddf5da225d8ce648aba4af50bc92728753bbf48b Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Mon, 18 Dec 2023 21:39:59 +0000
Subject: [PATCH 096/137] Fix integration test

---
 .../configs/timeouts_for_fetches.xml          |  1 +
 .../test_replicated_fetches_timeouts/test.py  | 30 ++++++++++---------
 2 files changed, 17 insertions(+), 14 deletions(-)
 create mode 100644 tests/integration/test_replicated_fetches_timeouts/configs/timeouts_for_fetches.xml

diff --git a/tests/integration/test_replicated_fetches_timeouts/configs/timeouts_for_fetches.xml b/tests/integration/test_replicated_fetches_timeouts/configs/timeouts_for_fetches.xml
new file mode 100644
index 00000000000..b163c6f54a1
--- /dev/null
+++ b/tests/integration/test_replicated_fetches_timeouts/configs/timeouts_for_fetches.xml
@@ -0,0 +1 @@
+<clickhouse></clickhouse>
diff --git a/tests/integration/test_replicated_fetches_timeouts/test.py b/tests/integration/test_replicated_fetches_timeouts/test.py
index 7d5da55549c..3505cbf1304 100644
--- a/tests/integration/test_replicated_fetches_timeouts/test.py
+++ b/tests/integration/test_replicated_fetches_timeouts/test.py
@@ -10,13 +10,20 @@ from helpers.network import PartitionManager
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
-    "node1", with_zookeeper=True, main_configs=["configs/server.xml"]
+    "node1", with_zookeeper=True, main_configs=["configs/server.xml", "configs/timeouts_for_fetches.xml"]
 )
 
 node2 = cluster.add_instance(
-    "node2", with_zookeeper=True, main_configs=["configs/server.xml"]
+    "node2", with_zookeeper=True, stay_alive=True, main_configs=["configs/server.xml", "configs/timeouts_for_fetches.xml"]
 )
 
+config = """
+<clickhouse>
+    <replicated_fetches_http_connection_timeout>30</replicated_fetches_http_connection_timeout>
+    <replicated_fetches_http_receive_timeout>1</replicated_fetches_http_receive_timeout>
+</clickhouse>
+"""
+
 
 @pytest.fixture(scope="module")
 def started_cluster():
@@ -49,14 +56,10 @@ def test_no_stall(started_cluster):
     node2.query("SYSTEM STOP FETCHES t")
 
     node1.query(
-        "INSERT INTO t SELECT 1, '{}' FROM numbers(500)".format(
-            get_random_string(104857)
-        )
+        f"INSERT INTO t SELECT 1, '{get_random_string(104857)}' FROM numbers(500)"
     )
     node1.query(
-        "INSERT INTO t SELECT 2, '{}' FROM numbers(500)".format(
-            get_random_string(104857)
-        )
+        f"INSERT INTO t SELECT 2, '{get_random_string(104857)}' FROM numbers(500)"
     )
 
     with PartitionManager() as pm:
@@ -82,14 +85,13 @@ def test_no_stall(started_cluster):
 
         print("Connection timeouts tested!")
 
-        # Increase connection timeout and wait for receive timeouts.
-        node2.query(
-            """
-            ALTER TABLE t
-                MODIFY SETTING replicated_fetches_http_connection_timeout = 30,
-                    replicated_fetches_http_receive_timeout = 1"""
+        node2.replace_config(
+            "/etc/clickhouse-server/config.d/timeouts_for_fetches.xml",
+            config
         )
 
+        node2.restart_clickhouse()
+
         while True:
             timeout_exceptions = int(
                 node2.query(

From 2be806618e28ce41312d829213e3ecc8bcf34597 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 18 Dec 2023 22:07:00 +0000
Subject: [PATCH 097/137] Automatic style fix

---
 .../test_replicated_fetches_timeouts/test.py         | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_replicated_fetches_timeouts/test.py b/tests/integration/test_replicated_fetches_timeouts/test.py
index 3505cbf1304..55fa4b909ba 100644
--- a/tests/integration/test_replicated_fetches_timeouts/test.py
+++ b/tests/integration/test_replicated_fetches_timeouts/test.py
@@ -10,11 +10,16 @@ from helpers.network import PartitionManager
 
 cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance(
-    "node1", with_zookeeper=True, main_configs=["configs/server.xml", "configs/timeouts_for_fetches.xml"]
+    "node1",
+    with_zookeeper=True,
+    main_configs=["configs/server.xml", "configs/timeouts_for_fetches.xml"],
 )
 
 node2 = cluster.add_instance(
-    "node2", with_zookeeper=True, stay_alive=True, main_configs=["configs/server.xml", "configs/timeouts_for_fetches.xml"]
+    "node2",
+    with_zookeeper=True,
+    stay_alive=True,
+    main_configs=["configs/server.xml", "configs/timeouts_for_fetches.xml"],
 )
 
 config = """
@@ -86,8 +91,7 @@ def test_no_stall(started_cluster):
         print("Connection timeouts tested!")
 
         node2.replace_config(
-            "/etc/clickhouse-server/config.d/timeouts_for_fetches.xml",
-            config
+            "/etc/clickhouse-server/config.d/timeouts_for_fetches.xml", config
         )
 
         node2.restart_clickhouse()

From fd460566f64e59ece249414e2e35b5c46e3cc4a2 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Mon, 18 Dec 2023 19:34:08 -0500
Subject: [PATCH 098/137] fix setting description

---
 src/Storages/MergeTree/MergeTreeSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index d9b996b36ca..07051a695de 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -73,7 +73,7 @@ struct Settings;
     M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \
     M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
     M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
-    M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "If true, estimated size (excluding lightweight deleted rows) will be used as source part size when selecting parts to merge", 0) \
+    M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \

From 0fc402c1068643d2689df2a2e8fe50bcff418a39 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Mon, 18 Dec 2023 22:48:48 +0000
Subject: [PATCH 099/137] Fix segfault in FuzzJSON engine

Allow only String type for FuzzJSON engine table columns.

Fixes: https://github.com/ClickHouse/ClickHouse/issues/57858
---
 src/Storages/StorageFuzzJSON.cpp                     |  5 +++++
 tests/queries/0_stateless/02919_storage_fuzzjson.sql | 12 ++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp
index 6bf69efa1dd..631775f7493 100644
--- a/src/Storages/StorageFuzzJSON.cpp
+++ b/src/Storages/StorageFuzzJSON.cpp
@@ -719,6 +719,11 @@ void registerStorageFuzzJSON(StorageFactory & factory)
                 throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzJSON must have arguments.");
 
             StorageFuzzJSON::Configuration configuration = StorageFuzzJSON::getConfiguration(engine_args, args.getLocalContext());
+
+            for (const auto& col : args.columns)
+                if (col.type->getTypeId() != TypeIndex::String)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzJSON' supports only columns of String type, got {}.", col.type->getName());
+
             return std::make_shared<StorageFuzzJSON>(args.table_id, args.columns, args.comment, configuration);
         });
 }
diff --git a/tests/queries/0_stateless/02919_storage_fuzzjson.sql b/tests/queries/0_stateless/02919_storage_fuzzjson.sql
index 80b4a406a08..1a85748b061 100644
--- a/tests/queries/0_stateless/02919_storage_fuzzjson.sql
+++ b/tests/queries/0_stateless/02919_storage_fuzzjson.sql
@@ -42,3 +42,15 @@ CREATE TABLE 02919_test_table_reuse_args(str String) ENGINE = FuzzJSON(
 SELECT count() FROM (SELECT * FROM 02919_test_table_reuse_args LIMIT 100);
 
 DROP TABLE IF EXISTS 02919_test_table_reuse_args;
+
+--
+DROP TABLE IF EXISTS 02919_test_table_invalid_col_type;
+CREATE TABLE 02919_test_table_invalid_col_type
+(
+   str Nullable(Int64)
+)
+ENGINE = FuzzJSON('{"pet":"rat"}', NULL); -- { serverError BAD_ARGUMENTS }
+
+DROP TABLE IF EXISTS 02919_test_table_invalid_col_type;
+
+--

From 6014dca114a03130499fc66f71b614ab06c623cb Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Tue, 19 Dec 2023 02:33:44 +0000
Subject: [PATCH 100/137] Allow multiple columns in StorageFuzzJSON

---
 src/Storages/StorageFuzzJSON.cpp                         | 6 +++++-
 .../queries/0_stateless/02919_storage_fuzzjson.reference | 1 +
 tests/queries/0_stateless/02919_storage_fuzzjson.sql     | 9 +++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp
index 631775f7493..4d94fe3cdb0 100644
--- a/src/Storages/StorageFuzzJSON.cpp
+++ b/src/Storages/StorageFuzzJSON.cpp
@@ -481,7 +481,11 @@ protected:
     {
         Columns columns;
         columns.reserve(block_header.columns());
-        columns.emplace_back(createColumn());
+        for (const auto& col : block_header)
+        {
+            chassert(col.type->getTypeId() == TypeIndex::String);
+            columns.emplace_back(createColumn());
+        }
 
         return {std::move(columns), block_size};
     }
diff --git a/tests/queries/0_stateless/02919_storage_fuzzjson.reference b/tests/queries/0_stateless/02919_storage_fuzzjson.reference
index a134ce52c11..8f4ee4a5615 100644
--- a/tests/queries/0_stateless/02919_storage_fuzzjson.reference
+++ b/tests/queries/0_stateless/02919_storage_fuzzjson.reference
@@ -1,3 +1,4 @@
 100
 100
 100
+100	100
diff --git a/tests/queries/0_stateless/02919_storage_fuzzjson.sql b/tests/queries/0_stateless/02919_storage_fuzzjson.sql
index 1a85748b061..bf473f4b6b8 100644
--- a/tests/queries/0_stateless/02919_storage_fuzzjson.sql
+++ b/tests/queries/0_stateless/02919_storage_fuzzjson.sql
@@ -54,3 +54,12 @@ ENGINE = FuzzJSON('{"pet":"rat"}', NULL); -- { serverError BAD_ARGUMENTS }
 DROP TABLE IF EXISTS 02919_test_table_invalid_col_type;
 
 --
+DROP TABLE IF EXISTS 02919_test_multi_col;
+CREATE TABLE 02919_test_multi_col
+(
+    str1 String,
+    str2 String
+) ENGINE = FuzzJSON('{"pet":"rat"}', 999);
+
+SELECT count(str1), count(str2) FROM (SELECT str1, str2 FROM 02919_test_multi_col LIMIT 100);
+DROP TABLE IF EXISTS 02919_test_multi_col;

From 31f04b66c5cb468ef9fb532c5101e771e8b75f6b Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Tue, 19 Dec 2023 03:47:22 +0000
Subject: [PATCH 101/137] fix freebsd build

https://github.com/ClickHouse/ClickHouse/actions/runs/7256640256/job/19769624432?pr=58015#step:8:5466
```
Dec 19 03:09:41 /build/src/IO/AIO.cpp:140:15: error: use of undeclared identifier 'ErrnoException'; did you mean 'DB::ErrnoException'?
Dec 19 03:09:41   140 |         throw ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed");
```
---
 src/IO/AIO.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp
index abad8a0727d..7a051950f52 100644
--- a/src/IO/AIO.cpp
+++ b/src/IO/AIO.cpp
@@ -137,7 +137,7 @@ AIOContext::AIOContext(unsigned int)
 {
     ctx = io_setup();
     if (ctx < 0)
-        throw ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed");
+        throw DB::ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed");
 }
 
 AIOContext::~AIOContext()

From 679a0e1300a72bcb770d7f8831906f4e797a336f Mon Sep 17 00:00:00 2001
From: Shani Elharrar <shani.elha@gmail.com>
Date: Thu, 14 Dec 2023 10:05:01 +0200
Subject: [PATCH 102/137] StorageS3 / TableFunctionS3: Allow passing
 session_token to AuthSettings

This can help users that want to pass temporary credentials that
issued by AWS in order to load data from S3 without changing
configuration or creating an IAM User.

Fixes #57848
---
 docs/en/sql-reference/table-functions/s3.md   |  4 +-
 .../table-functions/s3Cluster.md              |  5 +-
 docs/ru/sql-reference/table-functions/s3.md   |  2 +-
 .../table-functions/s3Cluster.md              |  4 +-
 docs/zh/sql-reference/table-functions/s3.md   |  2 +-
 src/Storages/StorageS3.cpp                    | 42 +++++++++++--
 src/TableFunctions/TableFunctionS3.cpp        | 62 ++++++++++++++++---
 src/TableFunctions/TableFunctionS3.h          |  6 +-
 src/TableFunctions/TableFunctionS3Cluster.h   |  1 +
 9 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index dc11259c626..61a9187575d 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -16,7 +16,7 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer
 **Syntax**
 
 ``` sql
-s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key [,session_token]] [,format] [,structure] [,compression])
 ```
 
 :::tip GCS
@@ -38,6 +38,8 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_
   :::
 
 - `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
+- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
+- `session_token` - Session token to use with the given keys. Optional when passing keys.
 - `format` — The [format](../../interfaces/formats.md#formats) of the file.
 - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
 - `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md
index 799eb31446a..080c9860519 100644
--- a/docs/en/sql-reference/table-functions/s3Cluster.md
+++ b/docs/en/sql-reference/table-functions/s3Cluster.md
@@ -10,14 +10,15 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) and Google
 **Syntax**
 
 ``` sql
-s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
+s3Cluster(cluster_name, source, [,access_key_id, secret_access_key, [session_token]] [,format] [,structure])
 ```
 
 **Arguments**
 
 - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
 - `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
-- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
+- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
+- `session_token` - Session token to use with the given keys. Optional when passing keys.
 - `format` — The [format](../../interfaces/formats.md#formats) of the file.
 - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
 
diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md
index 7deef68f47f..fe40cb0c507 100644
--- a/docs/ru/sql-reference/table-functions/s3.md
+++ b/docs/ru/sql-reference/table-functions/s3.md
@@ -11,7 +11,7 @@ sidebar_label: s3
 **Синтаксис**
 
 ``` sql
-s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+s3(path [,access_key_id, secret_access_key [,session_token]] [,format] [,structure] [,compression])
 ```
 
 **Aргументы**
diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md
index b8f34d805ff..b382bf5e384 100644
--- a/docs/ru/sql-reference/table-functions/s3Cluster.md
+++ b/docs/ru/sql-reference/table-functions/s3Cluster.md
@@ -11,14 +11,14 @@ sidebar_label: s3Cluster
 **Синтаксис**
 
 ``` sql
-s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
+s3Cluster(cluster_name, source, [,access_key_id, secret_access_key [,session_token]] [,format] [,structure])
 ```
 
 **Аргументы**
 
 -   `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам.
 -   `source` — URL файла или нескольких файлов. Поддерживает следующие символы подстановки: `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
--   `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры.
+-   `access_key_id`, `secret_access_key` и `session_token` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры.
 -   `format` — [формат](../../interfaces/formats.md#formats) файла.
 -   `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
 
diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md
index a62fa9ebb19..f7384a7526e 100644
--- a/docs/zh/sql-reference/table-functions/s3.md
+++ b/docs/zh/sql-reference/table-functions/s3.md
@@ -11,7 +11,7 @@ sidebar_label: s3
 **语法**
 
 ``` sql
-s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
+s3(path [,access_key_id, secret_access_key [,session_token]] ,format, structure, [compression])
 ```
 
 **参数**
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index e8f460525db..096e2e88f91 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -104,6 +104,7 @@ static const std::unordered_set<std::string_view> optional_configuration_keys =
     "structure",
     "access_key_id",
     "secret_access_key",
+    "session_token",
     "filename",
     "use_environment_credentials",
     "max_single_read_retries",
@@ -1521,11 +1522,14 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
         /// S3('url', NOSIGN, 'format')
         /// S3('url', NOSIGN, 'format', 'compression')
         /// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
+        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token')
         /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
+        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format')
         /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
+        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression')
         /// with optional headers() function
 
-        if (engine_args.empty() || engine_args.size() > 5)
+        if (engine_args.empty() || engine_args.size() > 6)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                             "Storage S3 requires 1 to 5 arguments: "
                             "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]");
@@ -1541,7 +1545,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
         static std::unordered_map<size_t, std::unordered_map<std::string_view, size_t>> size_to_engine_args
         {
             {1, {{}}},
-            {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}}
+            {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}}
         };
 
         std::unordered_map<std::string_view, size_t> engine_args_to_idx;
@@ -1577,7 +1581,8 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
             else
                 engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}};
         }
-        /// For 4 arguments we support 2 possible variants:
+        /// For 4 arguments we support 3 possible variants:
+        /// - s3(source, access_key_id, secret_access_key, session_token)
         /// - s3(source, access_key_id, secret_access_key, format)
         /// - s3(source, NOSIGN, format, compression_method)
         /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not.
@@ -1590,7 +1595,32 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
                 engine_args_to_idx = {{"format", 2}, {"compression_method", 3}};
             }
             else
-                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
+            {
+                auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "session_token/format");
+                if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+                {
+                    engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
+                }
+                else
+                {
+                    engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}};
+                }
+            }
+        }
+        /// For 5 arguments we support 2 possible variants:
+        /// - s3(source, access_key_id, secret_access_key, session_token, format)
+        /// - s3(source, access_key_id, secret_access_key, format, compression)
+        else if (engine_args.size() == 5)
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "session_token/format");
+            if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+            {
+                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}};
+            }
+            else
+            {
+                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}};
+            }
         }
         else
         {
@@ -1612,6 +1642,10 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context
         if (engine_args_to_idx.contains("secret_access_key"))
             configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
 
+        if (engine_args_to_idx.contains("session_token"))
+            configuration.auth_settings.session_token = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["session_token"]], "session_token");
+
+
         configuration.auth_settings.no_sign_request = no_sign_request;
     }
 
diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
index e6ae75a5fd5..c52256fb984 100644
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ b/src/TableFunctions/TableFunctionS3.cpp
@@ -71,7 +71,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
         if (header_it != args.end())
             args.erase(header_it);
 
-        if (args.empty() || args.size() > 6)
+        if (args.empty() || args.size() > 7)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature());
 
         for (auto & arg : args)
@@ -81,7 +81,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
         static std::unordered_map<size_t, std::unordered_map<std::string_view, size_t>> size_to_args
         {
             {1, {{}}},
-            {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}}
+            {7, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}}
         };
 
         std::unordered_map<std::string_view, size_t> args_to_idx;
@@ -118,11 +118,12 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
             else
                 args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}};
         }
-        /// For 4 arguments we support 3 possible variants:
+        /// For 4 arguments we support 4 possible variants:
         /// - s3(source, format, structure, compression_method),
-        /// - s3(source, access_key_id, access_key_id, format)
+        /// - s3(source, access_key_id, access_key_id, format),
+        /// - s3(source, access_key_id, access_key_id, session_token)
         /// - s3(source, NOSIGN, format, structure)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not.
+        /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not.
         else if (args.size() == 4)
         {
             auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/access_key_id/NOSIGN");
@@ -132,14 +133,28 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
                 args_to_idx = {{"format", 2}, {"structure", 3}};
             }
             else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
+            {
                 args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}};
+            }
             else
-                args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
+            {
+                auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
+                if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+                {
+                    args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
+                }
+                else
+                {
+                    args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}};
+                }
+            }
         }
-        /// For 5 arguments we support 2 possible variants:
+        /// For 5 arguments we support 3 possible variants:
         /// - s3(source, access_key_id, access_key_id, format, structure)
+        /// - s3(source, access_key_id, access_key_id, session_token, format)
         /// - s3(source, NOSIGN, format, structure, compression_method)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not.
+        /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no,
+        /// and by the 4-th argument, check if it's a format name or not
         else if (args.size() == 5)
         {
             auto second_arg = checkAndGetLiteralArgument<String>(args[1], "NOSIGN/access_key_id");
@@ -149,7 +164,33 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
                 args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}};
             }
             else
-                args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}};
+            {
+                auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
+                if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+                {
+                    args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}};
+                }
+                else
+                {
+                    args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}};
+                }
+            }
+        }
+        // For 6 arguments we support 2 possible variants:
+        /// - s3(source, access_key_id, access_key_id, format, structure, compression_method)
+        /// - s3(source, access_key_id, access_key_id, session_token, format, structure)
+        /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not
+        else if (args.size() == 6)
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
+            if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+            {
+                args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}};
+            }
+            else
+            {
+                args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}};
+            }
         }
         else
         {
@@ -181,6 +222,9 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
         if (args_to_idx.contains("secret_access_key"))
             configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(args[args_to_idx["secret_access_key"]], "secret_access_key");
 
+        if (args_to_idx.contains("session_token"))
+            configuration.auth_settings.session_token = checkAndGetLiteralArgument<String>(args[args_to_idx["session_token"]], "session_token");
+
         configuration.auth_settings.no_sign_request = no_sign_request;
 
         if (configuration.format == "auto")
diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
index fc384176007..fa73c1d313e 100644
--- a/src/TableFunctions/TableFunctionS3.h
+++ b/src/TableFunctions/TableFunctionS3.h
@@ -22,11 +22,15 @@ public:
     static constexpr auto signature = " - url\n"
                                       " - url, format\n"
                                       " - url, format, structure\n"
-                                      " - url, access_key_id, secret_access_key\n"
                                       " - url, format, structure, compression_method\n"
+                                      " - url, access_key_id, secret_access_key\n"
+                                      " - url, access_key_id, secret_access_key, session_token\n"
                                       " - url, access_key_id, secret_access_key, format\n"
+                                      " - url, access_key_id, secret_access_key, session_token, format\n"
                                       " - url, access_key_id, secret_access_key, format, structure\n"
+                                      " - url, access_key_id, secret_access_key, session_token, format, structure\n"
                                       " - url, access_key_id, secret_access_key, format, structure, compression_method\n"
+                                      " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
                                       "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
 
     static size_t getMaxNumberOfArguments() { return 6; }
diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h
index 4fe25079cf4..718b0d90de8 100644
--- a/src/TableFunctions/TableFunctionS3Cluster.h
+++ b/src/TableFunctions/TableFunctionS3Cluster.h
@@ -35,6 +35,7 @@ public:
                                       " - cluster, url, access_key_id, secret_access_key, format\n"
                                       " - cluster, url, access_key_id, secret_access_key, format, structure\n"
                                       " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n"
+                                      " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
                                       "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
 
     String getName() const override

From a077ad4c15936c2adae1397b4d9b8c3fabc30277 Mon Sep 17 00:00:00 2001
From: ubuntu <872237106@qq.com>
Date: Tue, 19 Dec 2023 15:18:27 +0800
Subject: [PATCH 103/137] support new analyzer

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 35 +++++++++++++++
 src/Analyzer/QueryNode.h                      | 13 ++++++
 src/Analyzer/QueryTreeBuilder.cpp             |  1 +
 .../0_stateless/02943_order_by_all.reference  | 45 +++++++++++++++++++
 .../0_stateless/02943_order_by_all.sql        | 35 +++++++++++++++
 5 files changed, 129 insertions(+)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 1e63d5ca8e4..4dd43984e23 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -119,6 +119,7 @@ namespace ErrorCodes
     extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
     extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
     extern const int SYNTAX_ERROR;
+    extern const int UNEXPECTED_EXPRESSION;
 }
 
 /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@@ -1209,6 +1210,8 @@ private:
 
     static void expandGroupByAll(QueryNode & query_tree_node_typed);
 
+    static void expandOrderByAll(QueryNode & query_tree_node_typed);
+
     static std::string
     rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
 
@@ -2312,6 +2315,35 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
         recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
 }
 
+void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
+{
+    auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as<SortNode>();
+    if (!all_node)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not sort node.");
+
+    auto & projection_nodes = query_tree_node_typed.getProjection().getNodes();
+    auto list_node = std::make_shared<ListNode>();
+    list_node->getNodes().reserve(projection_nodes.size());
+
+    for (auto & node : projection_nodes)
+    {
+        if (auto * identifier_node = node->as<IdentifierNode>(); identifier_node != nullptr)
+            if (Poco::toUpper(identifier_node->getIdentifier().getFullName()) == "ALL" || Poco::toUpper(identifier_node->getAlias()) == "ALL")
+                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
+				"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
+
+        if (auto * function_node = node->as<FunctionNode>(); function_node != nullptr)
+            if (Poco::toUpper(function_node->getAlias()) == "ALL")
+                throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
+                                "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
+
+        auto sort_node = std::make_shared<SortNode>(node, all_node->getSortDirection(), all_node->getNullsSortDirection());
+        list_node->getNodes().push_back(sort_node);
+    }
+
+    query_tree_node_typed.getOrderByNode() = list_node;
+}
+
 std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
     const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context)
 {
@@ -6975,6 +7007,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
     if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube)
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
 
+    if (settings.enable_order_by_all && query_node_typed.isOrderByAll())
+	expandOrderByAll(query_node_typed);
+
     /// Initialize aliases in query node scope
     QueryExpressionsAliasVisitor visitor(scope);
 
diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h
index 82bc72b7411..d8b8741afb2 100644
--- a/src/Analyzer/QueryNode.h
+++ b/src/Analyzer/QueryNode.h
@@ -219,6 +219,18 @@ public:
         is_group_by_all = is_group_by_all_value;
     }
 
+    /// Returns true, if query node has ORDER BY ALL modifier, false otherwise
+    bool isOrderByAll() const
+    {
+        return is_order_by_all;
+    }
+
+    /// Set query node ORDER BY ALL modifier value
+    void setIsOrderByAll(bool is_order_by_all_value)
+    {
+        is_order_by_all = is_order_by_all_value;
+    }
+
     /// Returns true if query node WITH section is not empty, false otherwise
     bool hasWith() const
     {
@@ -590,6 +602,7 @@ private:
     bool is_group_by_with_cube = false;
     bool is_group_by_with_grouping_sets = false;
     bool is_group_by_all = false;
+    bool is_order_by_all = false;
 
     std::string cte_name;
     NamesAndTypes projection_columns;
diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index c541888e5b9..4e2d0ad10a8 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -284,6 +284,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
     current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup);
     current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets);
     current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
+    current_query_tree->setIsOrderByAll(select_query_typed.order_by_all);
     current_query_tree->setOriginalAST(select_query);
 
     auto current_context = current_query_tree->getContext();
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index f60c7976ae6..b4c6229ff93 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -42,3 +42,48 @@ B	3	10
 D	1	20
 A	2	30
 C	\N	40
+-- enable new analyzer
+-- no modifiers
+A	2
+B	3
+C	\N
+D	1
+1	D
+2	A
+3	B
+\N	C
+-- with ASC/DESC modifiers
+A	2
+B	3
+C	\N
+D	1
+D	1
+C	\N
+B	3
+A	2
+-- with NULLS FIRST/LAST modifiers
+\N	C
+1	D
+2	A
+3	B
+1	D
+2	A
+3	B
+\N	C
+-- what happens if some column "all" already exists?
+B	3	10
+D	1	20
+A	2	30
+C	\N	40
+D	1
+A	2
+B	3
+C	\N
+A 2
+B 3
+D 1
+\N
+B	3	10
+D	1	20
+A	2	30
+C	\N	40
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index c1e358178d5..876365300ea 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -43,4 +43,39 @@ SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enab
 
 SELECT a, b, all FROM order_by_all ORDER BY all, a;
 
+SELECT '-- enable new analyzer';
+set allow_experimental_analyzer = 1;
+
+SELECT '-- no modifiers';
+SELECT a, b FROM order_by_all ORDER BY ALL;
+SELECT b, a FROM order_by_all ORDER BY ALL;
+
+SELECT '-- with ASC/DESC modifiers';
+SELECT a, b FROM order_by_all ORDER BY ALL ASC;
+SELECT a, b FROM order_by_all ORDER BY ALL DESC;
+
+SELECT '-- with NULLS FIRST/LAST modifiers';
+SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
+SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
+
+SELECT '-- what happens if some column "all" already exists?';
+
+-- columns
+SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+-- column aliases
+SELECT a, b AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+-- expressions
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+SELECT a, b, all FROM order_by_all ORDER BY all, a;
+
 DROP TABLE order_by_all;
+

From 1d9c0db1f6feadbfbd96a818b99927d00943ad98 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 19 Dec 2023 08:00:59 +0000
Subject: [PATCH 104/137] Incorporate review feedback

---
 src/Functions/punycode.cpp                   | 74 ++++++--------------
 tests/queries/0_stateless/02932_punycode.sql |  2 +-
 2 files changed, 21 insertions(+), 55 deletions(-)

diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp
index e90cba82b1f..c11409f0d1a 100644
--- a/src/Functions/punycode.cpp
+++ b/src/Functions/punycode.cpp
@@ -33,12 +33,10 @@ struct PunycodeEncodeImpl
         ColumnString::Offsets & res_offsets)
     {
         const size_t rows = offsets.size();
-        res_data.resize(rows * 64); /// just a guess
-        res_offsets.resize(rows);
+        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
+        res_offsets.reserve(rows);
 
         size_t prev_offset = 0;
-        size_t prev_res_offset = 0;
-        size_t res_data_bytes_written = 0;
         std::u32string value_utf32;
         std::string value_puny;
         for (size_t row = 0; row < rows; ++row)
@@ -46,37 +44,22 @@ struct PunycodeEncodeImpl
             const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
             const size_t value_length = offsets[row] - prev_offset - 1;
 
-            size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
-            value_utf32.resize(value_utf32_length, '\0');
-
+            const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
+            value_utf32.resize(value_utf32_length);
             ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
 
-            bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
+            const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
             if (!ok)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode encoding");
 
-            const size_t bytes_to_write = value_puny.size() + 1;
-            if (res_data_bytes_written + bytes_to_write > res_data.size())
-            {
-                size_t new_size = std::max(res_data.size() * 2, res_data_bytes_written + bytes_to_write);
-                res_data.resize(new_size);
-            }
-
-            std::memcpy(&res_data[res_data_bytes_written], value_puny.data(), value_puny.size());
-            res_data_bytes_written += value_puny.size();
-
-            res_data[res_data_bytes_written] = '\0';
-            res_data_bytes_written += 1;
-
-            res_offsets[row] = prev_res_offset + bytes_to_write;
+            res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
+            res_offsets.push_back(res_data.size());
 
             prev_offset = offsets[row];
-            prev_res_offset = res_offsets[row];
-            value_utf32.clear();
-            value_puny.clear();
-        }
 
-        res_data.resize(res_data_bytes_written);
+            value_utf32.clear();
+            value_puny.clear(); /// utf32_to_punycode() appends to its output string
+        }
     }
 
     [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
@@ -94,12 +77,10 @@ struct PunycodeDecodeImpl
         ColumnString::Offsets & res_offsets)
     {
         const size_t rows = offsets.size();
-        res_data.resize(rows * 64); /// just a guess
-        res_offsets.resize(rows);
+        res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
+        res_offsets.reserve(rows);
 
         size_t prev_offset = 0;
-        size_t prev_res_offset = 0;
-        size_t res_data_bytes_written = 0;
         std::u32string value_utf32;
         std::string value_utf8;
         for (size_t row = 0; row < rows; ++row)
@@ -107,38 +88,23 @@ struct PunycodeDecodeImpl
             const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
             const size_t value_length = offsets[row] - prev_offset - 1;
 
-            std::string_view value_punycode(value, value_length);
-            bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
+            const std::string_view value_punycode(value, value_length);
+            const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
             if (!ok)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode decoding");
 
-            size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
-            value_utf8.resize(utf8_length, '\0');
-
+            const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
+            value_utf8.resize(utf8_length);
             ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data());
 
-            const size_t bytes_to_write = value_utf8.size() + 1;
-            if (res_data_bytes_written + bytes_to_write > res_data.size())
-            {
-                size_t new_size = std::max(res_data.size() * 2, res_data_bytes_written + bytes_to_write);
-                res_data.resize(new_size);
-            }
-
-            std::memcpy(&res_data[res_data_bytes_written], value_utf8.data(), value_utf8.size());
-            res_data_bytes_written += value_utf8.size();
-
-            res_data[res_data_bytes_written] = '\0';
-            res_data_bytes_written += 1;
-
-            res_offsets[row] = prev_res_offset + bytes_to_write;
+            res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1);
+            res_offsets.push_back(res_data.size());
 
             prev_offset = offsets[row];
-            prev_res_offset = res_offsets[row];
-            value_utf32.clear();
+
+            value_utf32.clear(); /// punycode_to_utf32() appends to its output string
             value_utf8.clear();
         }
-
-        res_data.resize(res_data_bytes_written);
     }
 
     [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
diff --git a/tests/queries/0_stateless/02932_punycode.sql b/tests/queries/0_stateless/02932_punycode.sql
index fd128507a8f..dd18a43ecc9 100644
--- a/tests/queries/0_stateless/02932_punycode.sql
+++ b/tests/queries/0_stateless/02932_punycode.sql
@@ -14,7 +14,7 @@ SELECT punycodeEncode(toFixedString('two', 3)); -- { serverError ILLEGAL_COLUMN
 
 SELECT '-- Regular cases';
 
--- The test cases originate from the idna unit tests:
+-- The test cases originate from the ada idna unit tests:
 --- https://github.com/ada-url/idna/blob/8cd03ef867dbd06be87bd61df9cf69aa1182ea21/tests/fixtures/utf8_punycode_alternating.txt
 
 SELECT 'a' AS str, punycodeEncode(str) AS puny, punycodeDecode(puny) AS original;

From a8845ba729224ab10e21349a416ec5f3b5d84eab Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 19 Dec 2023 08:13:17 +0000
Subject: [PATCH 105/137] Reorganize the test (a bit) + whitespace fixes

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     |  4 +-
 .../0_stateless/02943_order_by_all.reference  | 69 ++++++++---------
 .../0_stateless/02943_order_by_all.sql        | 74 ++++++++++---------
 3 files changed, 75 insertions(+), 72 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 4dd43984e23..3290d918a8b 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2330,7 +2330,7 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
         if (auto * identifier_node = node->as<IdentifierNode>(); identifier_node != nullptr)
             if (Poco::toUpper(identifier_node->getIdentifier().getFullName()) == "ALL" || Poco::toUpper(identifier_node->getAlias()) == "ALL")
                 throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
-				"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
+                    "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
 
         if (auto * function_node = node->as<FunctionNode>(); function_node != nullptr)
             if (Poco::toUpper(function_node->getAlias()) == "ALL")
@@ -7008,7 +7008,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
 
     if (settings.enable_order_by_all && query_node_typed.isOrderByAll())
-	expandOrderByAll(query_node_typed);
+        expandOrderByAll(query_node_typed);
 
     /// Initialize aliases in query node scope
     QueryExpressionsAliasVisitor visitor(scope);
diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference
index b4c6229ff93..48d828b6924 100644
--- a/tests/queries/0_stateless/02943_order_by_all.reference
+++ b/tests/queries/0_stateless/02943_order_by_all.reference
@@ -7,6 +7,14 @@ D	1
 2	A
 3	B
 \N	C
+A	2
+B	3
+C	\N
+D	1
+1	D
+2	A
+3	B
+\N	C
 -- with ASC/DESC modifiers
 A	2
 B	3
@@ -16,43 +24,6 @@ D	1
 C	\N
 B	3
 A	2
--- with NULLS FIRST/LAST modifiers
-\N	C
-1	D
-2	A
-3	B
-1	D
-2	A
-3	B
-\N	C
--- what happens if some column "all" already exists?
-B	3	10
-D	1	20
-A	2	30
-C	\N	40
-D	1
-A	2
-B	3
-C	\N
-A 2
-B 3
-D 1
-\N
-B	3	10
-D	1	20
-A	2	30
-C	\N	40
--- enable new analyzer
--- no modifiers
-A	2
-B	3
-C	\N
-D	1
-1	D
-2	A
-3	B
-\N	C
--- with ASC/DESC modifiers
 A	2
 B	3
 C	\N
@@ -70,15 +41,35 @@ A	2
 2	A
 3	B
 \N	C
+\N	C
+1	D
+2	A
+3	B
+1	D
+2	A
+3	B
+\N	C
 -- what happens if some column "all" already exists?
 B	3	10
 D	1	20
 A	2	30
 C	\N	40
+B	3	10
+D	1	20
+A	2	30
+C	\N	40
 D	1
 A	2
 B	3
 C	\N
+D	1
+A	2
+B	3
+C	\N
+A 2
+B 3
+D 1
+\N
 A 2
 B 3
 D 1
@@ -87,3 +78,7 @@ B	3	10
 D	1	20
 A	2	30
 C	\N	40
+B	3	10
+D	1	20
+A	2	30
+C	\N	40
diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql
index 876365300ea..0756563946c 100644
--- a/tests/queries/0_stateless/02943_order_by_all.sql
+++ b/tests/queries/0_stateless/02943_order_by_all.sql
@@ -13,69 +13,77 @@ ENGINE = Memory;
 INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
 
 SELECT '-- no modifiers';
+
+SET allow_experimental_analyzer = 0;
+SELECT a, b FROM order_by_all ORDER BY ALL;
+SELECT b, a FROM order_by_all ORDER BY ALL;
+
+SET allow_experimental_analyzer = 1;
 SELECT a, b FROM order_by_all ORDER BY ALL;
 SELECT b, a FROM order_by_all ORDER BY ALL;
 
 SELECT '-- with ASC/DESC modifiers';
+
+SET allow_experimental_analyzer = 0;
+SELECT a, b FROM order_by_all ORDER BY ALL ASC;
+SELECT a, b FROM order_by_all ORDER BY ALL DESC;
+
+SET allow_experimental_analyzer = 1;
 SELECT a, b FROM order_by_all ORDER BY ALL ASC;
 SELECT a, b FROM order_by_all ORDER BY ALL DESC;
 
 SELECT '-- with NULLS FIRST/LAST modifiers';
+
+SET allow_experimental_analyzer = 0;
+SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
+SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
+
+SET allow_experimental_analyzer = 1;
 SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
 SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
 
 SELECT '-- what happens if some column "all" already exists?';
 
 -- columns
+
+SET allow_experimental_analyzer = 0;
+SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+SET allow_experimental_analyzer = 1;
 SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
 
 -- column aliases
+
+SET allow_experimental_analyzer = 0;
+SELECT a, b AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+SET allow_experimental_analyzer = 1;
 SELECT a, b AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
 
 -- expressions
+
+SET allow_experimental_analyzer = 0;
 SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
 SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
 
+SET allow_experimental_analyzer = 1;
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
+SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
+
+SET allow_experimental_analyzer = 0;
 SELECT a, b, all FROM order_by_all ORDER BY all, a;
 
-SELECT '-- enable new analyzer';
-set allow_experimental_analyzer = 1;
-
-SELECT '-- no modifiers';
-SELECT a, b FROM order_by_all ORDER BY ALL;
-SELECT b, a FROM order_by_all ORDER BY ALL;
-
-SELECT '-- with ASC/DESC modifiers';
-SELECT a, b FROM order_by_all ORDER BY ALL ASC;
-SELECT a, b FROM order_by_all ORDER BY ALL DESC;
-
-SELECT '-- with NULLS FIRST/LAST modifiers';
-SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
-SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
-
-SELECT '-- what happens if some column "all" already exists?';
-
--- columns
-SELECT a, b, all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b, all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-
--- column aliases
-SELECT a, b AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-
--- expressions
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL;  -- { serverError UNEXPECTED_EXPRESSION }
-SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-
+SET allow_experimental_analyzer = 1;
 SELECT a, b, all FROM order_by_all ORDER BY all, a;
 
 DROP TABLE order_by_all;
-

From 5f38e1d94433d8589b83205d2983d007678d50d4 Mon Sep 17 00:00:00 2001
From: Shani Elharrar <shani.elha@gmail.com>
Date: Mon, 18 Dec 2023 16:31:01 +0200
Subject: [PATCH 106/137] S3 Session Tokens: Added tests

---
 tests/integration/test_storage_s3/test.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 16183733656..2549cb0d473 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -626,7 +626,7 @@ def test_wrong_s3_syntax(started_cluster):
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     expected_err_msg = "Code: 42"  # NUMBER_OF_ARGUMENTS_DOESNT_MATCH
 
-    query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('', '', '', '', '', '')"
+    query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('', '', '', '', '', '', '')"
     assert expected_err_msg in instance.query_and_get_error(query)
 
     expected_err_msg = "Code: 36"  # BAD_ARGUMENTS
@@ -1395,6 +1395,7 @@ def test_schema_inference_from_globs(started_cluster):
 
 
 def test_signatures(started_cluster):
+    session_token = "session token that will not be checked by MiniIO"
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]
 
@@ -1417,6 +1418,11 @@ def test_signatures(started_cluster):
     )
     assert int(result) == 1
 
+    result = instance.query(
+        f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}')"
+    )
+    assert int(result) == 1
+
     result = instance.query(
         f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'Arrow', 'x UInt64', 'auto')"
     )
@@ -1427,6 +1433,21 @@ def test_signatures(started_cluster):
     )
     assert int(result) == 1
 
+    result = instance.query(
+        f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow')"
+    )
+    assert int(result) == 1
+
+    lt = instance.query(
+        f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow', 'x UInt64')"
+    )
+    assert int(result) == 1
+
+    lt = instance.query(
+        f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow', 'x UInt64', 'auto')"
+    )
+    assert int(result) == 1
+
 
 def test_select_columns(started_cluster):
     bucket = started_cluster.minio_bucket

From 06a2e86983fe46c203d20878f23b8e86d3f3e41c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 19 Dec 2023 10:02:45 +0100
Subject: [PATCH 107/137] Fix test

---
 ...lly_change_filesystem_cache_size.reference |  2 +-
 ...ynamically_change_filesystem_cache_size.sh | 19 ++++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
index cd155b6ca29..8620171cb99 100644
--- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
+++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
@@ -1,4 +1,4 @@
-100	10	10	10	0	0	98	10	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	5	5000	0	1
+100	10	10	10	0	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	5	5000	0	1
 0
 10
 98
diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh
index e47e13a7e40..2e344a6b6e5 100755
--- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh
+++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-parallel, no-s3-storage
+# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -7,6 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 disk_name="s3_cache_02944"
 
+$CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE"
 $CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
 
 $CLICKHOUSE_CLIENT -nm --query "
@@ -32,7 +33,9 @@ cat $config_path \
 > $config_path_tmp
 mv $config_path_tmp $config_path
 
-$CLICKHOUSE_CLIENT -nm --query "SYSTEM RELOAD CONFIG"
+$CLICKHOUSE_CLIENT -nm --query "
+set send_logs_level='fatal';
+SYSTEM RELOAD CONFIG"
 $CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
 
 $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
@@ -44,7 +47,9 @@ cat $config_path \
 > $config_path_tmp
 mv $config_path_tmp $config_path
 
-$CLICKHOUSE_CLIENT -nm --query "SYSTEM RELOAD CONFIG"
+$CLICKHOUSE_CLIENT -nm --query "
+set send_logs_level='fatal';
+SYSTEM RELOAD CONFIG"
 $CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
 
 $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null"
@@ -58,7 +63,9 @@ cat $config_path \
 > $config_path_tmp
 mv $config_path_tmp $config_path
 
-$CLICKHOUSE_CLIENT -nm --query "SYSTEM RELOAD CONFIG"
+$CLICKHOUSE_CLIENT -nm --query "
+set send_logs_level='fatal';
+SYSTEM RELOAD CONFIG"
 $CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
 
 $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'"
@@ -70,7 +77,9 @@ cat $config_path \
 > $config_path_tmp
 mv $config_path_tmp $config_path
 
-$CLICKHOUSE_CLIENT -nm --query "SYSTEM RELOAD CONFIG"
+$CLICKHOUSE_CLIENT -nm --query "
+set send_logs_level='fatal';
+SYSTEM RELOAD CONFIG"
 $CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'"
 
 $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null"

From 52b3caec906fb35aa9a9ffdd5fbe4e83ffee43d6 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 19 Dec 2023 10:36:22 +0100
Subject: [PATCH 108/137] Revert "Add system.dropped_tables_parts table"

---
 .../system-tables/dropped_tables_parts.md     | 14 ---
 .../StorageSystemDroppedTablesParts.cpp       | 89 -------------------
 .../System/StorageSystemDroppedTablesParts.h  | 33 -------
 src/Storages/System/StorageSystemParts.h      |  2 +-
 .../System/StorageSystemPartsBase.cpp         | 68 +++++++++++---
 src/Storages/System/StorageSystemPartsBase.h  | 81 ++---------------
 src/Storages/System/attachSystemTables.cpp    |  2 -
 7 files changed, 64 insertions(+), 225 deletions(-)
 delete mode 100644 docs/en/operations/system-tables/dropped_tables_parts.md
 delete mode 100644 src/Storages/System/StorageSystemDroppedTablesParts.cpp
 delete mode 100644 src/Storages/System/StorageSystemDroppedTablesParts.h

diff --git a/docs/en/operations/system-tables/dropped_tables_parts.md b/docs/en/operations/system-tables/dropped_tables_parts.md
deleted file mode 100644
index 095f35287fe..00000000000
--- a/docs/en/operations/system-tables/dropped_tables_parts.md
+++ /dev/null
@@ -1,14 +0,0 @@
----
-slug: /en/operations/system-tables/dropped_tables_parts
----
-# dropped_tables_parts {#system_tables-dropped_tables_parts}
-
-Contains information about parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) dropped tables from [system.dropped_tables](./dropped_tables.md)
-
-The schema of this table is the same as [system.parts](./parts.md)
-
-**See Also**
-
-- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
-- [system.parts](./parts.md)
-- [system.dropped_tables](./dropped_tables.md)
diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp
deleted file mode 100644
index 9253cc99d72..00000000000
--- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <Storages/StorageMaterializedMySQL.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Access/ContextAccess.h>
-#include <Storages/System/StorageSystemDroppedTablesParts.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeUUID.h>
-
-
-namespace DB
-{
-
-
-StoragesDroppedInfoStream::StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context)
-        : StoragesInfoStreamBase(context)
-{
-    needsLock = false;
-
-    /// Will apply WHERE to subset of columns and then add more columns.
-    /// This is kind of complicated, but we use WHERE to do less work.
-
-    Block block_to_filter;
-
-    MutableColumnPtr database_column_mut = ColumnString::create();
-    MutableColumnPtr table_column_mut = ColumnString::create();
-    MutableColumnPtr engine_column_mut = ColumnString::create();
-    MutableColumnPtr active_column_mut = ColumnUInt8::create();
-    MutableColumnPtr storage_uuid_column_mut = ColumnUUID::create();
-
-    const auto access = context->getAccess();
-    const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES);
-
-    auto tables_mark_dropped = DatabaseCatalog::instance().getTablesMarkedDropped();
-    for (const auto & dropped_table : tables_mark_dropped)
-    {
-        StoragePtr storage = dropped_table.table;
-        if (!storage)
-            continue;
-
-        UUID storage_uuid = storage->getStorageID().uuid;
-        String database_name = storage->getStorageID().getDatabaseName();
-        String table_name = storage->getStorageID().getTableName();
-        String engine_name = storage->getName();
-#if USE_MYSQL
-        if (auto * proxy = dynamic_cast<StorageMaterializedMySQL *>(storage.get()))
-        {
-            auto nested = proxy->getNested();
-            storage.swap(nested);
-        }
-#endif
-        if (!dynamic_cast<MergeTreeData *>(storage.get()))
-            continue;
-
-        if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
-            continue;
-
-        storages[storage_uuid] = storage;
-
-        /// Add all combinations of flag 'active'.
-        for (UInt64 active : {0, 1})
-        {
-            database_column_mut->insert(database_name);
-            table_column_mut->insert(table_name);
-            engine_column_mut->insert(engine_name);
-            active_column_mut->insert(active);
-            storage_uuid_column_mut->insert(storage_uuid);
-        }
-    }
-
-    block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared<DataTypeString>(), "database"));
-    block_to_filter.insert(ColumnWithTypeAndName(std::move(table_column_mut), std::make_shared<DataTypeString>(), "table"));
-    block_to_filter.insert(ColumnWithTypeAndName(std::move(engine_column_mut), std::make_shared<DataTypeString>(), "engine"));
-    block_to_filter.insert(ColumnWithTypeAndName(std::move(active_column_mut), std::make_shared<DataTypeUInt8>(), "active"));
-    block_to_filter.insert(ColumnWithTypeAndName(std::move(storage_uuid_column_mut), std::make_shared<DataTypeUUID>(), "uuid"));
-
-    if (block_to_filter.rows())
-    {
-        /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'.
-        VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context);
-        rows = block_to_filter.rows();
-    }
-
-    database_column = block_to_filter.getByName("database").column;
-    table_column = block_to_filter.getByName("table").column;
-    active_column = block_to_filter.getByName("active").column;
-    storage_uuid_column = block_to_filter.getByName("uuid").column;
-}
-
-
-}
diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.h b/src/Storages/System/StorageSystemDroppedTablesParts.h
deleted file mode 100644
index 1a8a27e0b7c..00000000000
--- a/src/Storages/System/StorageSystemDroppedTablesParts.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#pragma once
-
-#include <Storages/System/StorageSystemParts.h>
-
-
-namespace DB
-{
-
-class StoragesDroppedInfoStream : public StoragesInfoStreamBase
-{
-public:
-    StoragesDroppedInfoStream(const SelectQueryInfo & query_info, ContextPtr context);
-};
-
-class Context;
-
-
-/** Implements system table 'dropped_tables_parts' which allows to get information about data parts for dropped but not yet removed tables.
-  */
-class StorageSystemDroppedTablesParts final : public StorageSystemParts
-{
-public:
-    explicit StorageSystemDroppedTablesParts(const StorageID & table_id) : StorageSystemParts(table_id) {}
-
-    std::string getName() const override { return "SystemDroppedTablesParts"; }
-protected:
-    std::unique_ptr<StoragesInfoStreamBase> getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) override
-    {
-        return std::make_unique<StoragesDroppedInfoStream>(query_info, context);
-    }
-};
-
-}
diff --git a/src/Storages/System/StorageSystemParts.h b/src/Storages/System/StorageSystemParts.h
index e0082e40e7d..c7a46cfda54 100644
--- a/src/Storages/System/StorageSystemParts.h
+++ b/src/Storages/System/StorageSystemParts.h
@@ -11,7 +11,7 @@ class Context;
 
 /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family.
   */
-class StorageSystemParts : public StorageSystemPartsBase
+class StorageSystemParts final : public StorageSystemPartsBase
 {
 public:
     explicit StorageSystemParts(const StorageID & table_id_);
diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index 7b2e9fe18a7..8d2e2900722 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -6,7 +6,6 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeUUID.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/StorageMaterializedMySQL.h>
 #include <Storages/VirtualColumnUtils.h>
@@ -23,6 +22,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 bool StorageSystemPartsBase::hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot)
 {
     bool has_state_column = false;
@@ -79,7 +83,7 @@ StoragesInfo::getProjectionParts(MergeTreeData::DataPartStateVector & state, boo
 }
 
 StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context)
-    : StoragesInfoStreamBase(context)
+    : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef())
 {
     /// Will apply WHERE to subset of columns and then add more columns.
     /// This is kind of complicated, but we use WHERE to do less work.
@@ -89,7 +93,6 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
     MutableColumnPtr table_column_mut = ColumnString::create();
     MutableColumnPtr engine_column_mut = ColumnString::create();
     MutableColumnPtr active_column_mut = ColumnUInt8::create();
-    MutableColumnPtr storage_uuid_column_mut = ColumnUUID::create();
 
     const auto access = context->getAccess();
     const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES);
@@ -136,7 +139,6 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
                         continue;
 
                     String engine_name = storage->getName();
-                    UUID storage_uuid = storage->getStorageID().uuid;
 
 #if USE_MYSQL
                     if (auto * proxy = dynamic_cast<StorageMaterializedMySQL *>(storage.get()))
@@ -151,7 +153,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
                     if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
                         continue;
 
-                    storages[storage_uuid] = storage;
+                    storages[std::make_pair(database_name, iterator->name())] = storage;
 
                     /// Add all combinations of flag 'active'.
                     for (UInt64 active : {0, 1})
@@ -159,7 +161,6 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
                         table_column_mut->insert(table_name);
                         engine_column_mut->insert(engine_name);
                         active_column_mut->insert(active);
-                        storage_uuid_column_mut->insert(storage_uuid);
                     }
 
                     offsets[i] += 2;
@@ -177,7 +178,6 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
     block_to_filter.insert(ColumnWithTypeAndName(std::move(table_column_mut), std::make_shared<DataTypeString>(), "table"));
     block_to_filter.insert(ColumnWithTypeAndName(std::move(engine_column_mut), std::make_shared<DataTypeString>(), "engine"));
     block_to_filter.insert(ColumnWithTypeAndName(std::move(active_column_mut), std::make_shared<DataTypeUInt8>(), "active"));
-    block_to_filter.insert(ColumnWithTypeAndName(std::move(storage_uuid_column_mut), std::make_shared<DataTypeUUID>(), "uuid"));
 
     if (rows)
     {
@@ -189,9 +189,57 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, Conte
     database_column = block_to_filter.getByName("database").column;
     table_column = block_to_filter.getByName("table").column;
     active_column = block_to_filter.getByName("active").column;
-    storage_uuid_column = block_to_filter.getByName("uuid").column;
+
+    next_row = 0;
 }
 
+StoragesInfo StoragesInfoStream::next()
+{
+    while (next_row < rows)
+    {
+        StoragesInfo info;
+
+        info.database = (*database_column)[next_row].get<String>();
+        info.table = (*table_column)[next_row].get<String>();
+
+        auto is_same_table = [&info, this] (size_t row) -> bool
+        {
+            return (*database_column)[row].get<String>() == info.database &&
+                   (*table_column)[row].get<String>() == info.table;
+        };
+
+        /// We may have two rows per table which differ in 'active' value.
+        /// If rows with 'active = 0' were not filtered out, this means we
+        /// must collect the inactive parts. Remember this fact in StoragesInfo.
+        for (; next_row < rows && is_same_table(next_row); ++next_row)
+        {
+            const auto active = (*active_column)[next_row].get<UInt64>();
+            if (active == 0)
+                info.need_inactive_parts = true;
+        }
+
+        info.storage = storages.at(std::make_pair(info.database, info.table));
+
+        /// For table not to be dropped and set of columns to remain constant.
+        info.table_lock = info.storage->tryLockForShare(query_id, settings.lock_acquire_timeout);
+
+        if (info.table_lock == nullptr)
+        {
+            // Table was dropped while acquiring the lock, skipping table
+            continue;
+        }
+
+        info.engine = info.storage->getName();
+
+        info.data = dynamic_cast<MergeTreeData *>(info.storage.get());
+        if (!info.data)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", info.engine);
+
+        return info;
+    }
+
+    return {};
+}
 
 Pipe StorageSystemPartsBase::read(
     const Names & column_names,
@@ -204,7 +252,7 @@ Pipe StorageSystemPartsBase::read(
 {
     bool has_state_column = hasStateColumn(column_names, storage_snapshot);
 
-    auto stream = getStoragesInfoStream(query_info, context);
+    StoragesInfoStream stream(query_info, context);
 
     /// Create the result.
     Block sample = storage_snapshot->metadata->getSampleBlock();
@@ -215,7 +263,7 @@ Pipe StorageSystemPartsBase::read(
     if (has_state_column)
         res_columns.push_back(ColumnString::create());
 
-    while (StoragesInfo info = stream->next())
+    while (StoragesInfo info = stream.next())
     {
         processNextStorage(context, res_columns, columns_mask, info, has_state_column);
     }
diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h
index fd20b0756b2..c3d2e64b303 100644
--- a/src/Storages/System/StorageSystemPartsBase.h
+++ b/src/Storages/System/StorageSystemPartsBase.h
@@ -8,11 +8,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 class Context;
 
 struct StoragesInfo
@@ -34,64 +29,13 @@ struct StoragesInfo
 };
 
 /** A helper class that enumerates the storages that match given query. */
-class StoragesInfoStreamBase
+class StoragesInfoStream
 {
 public:
-    StoragesInfoStreamBase(ContextPtr context)
-        : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef()), next_row(0), rows(0)
-    {}
+    StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context);
+    StoragesInfo next();
 
-    StoragesInfo next()
-    {
-        while (next_row < rows)
-        {
-            StoragesInfo info;
-
-            info.database = (*database_column)[next_row].get<String>();
-            info.table = (*table_column)[next_row].get<String>();
-            UUID storage_uuid = (*storage_uuid_column)[next_row].get<UUID>();
-
-            auto is_same_table = [&storage_uuid, this] (size_t row) -> bool
-            {
-                return (*storage_uuid_column)[row].get<UUID>() == storage_uuid;
-            };
-
-            /// We may have two rows per table which differ in 'active' value.
-            /// If rows with 'active = 0' were not filtered out, this means we
-            /// must collect the inactive parts. Remember this fact in StoragesInfo.
-            for (; next_row < rows && is_same_table(next_row); ++next_row)
-            {
-                const auto active = (*active_column)[next_row].get<UInt64>();
-                if (active == 0)
-                    info.need_inactive_parts = true;
-            }
-
-            info.storage = storages.at(storage_uuid);
-
-            if (needsLock)
-            {
-                /// For table not to be dropped and set of columns to remain constant.
-                info.table_lock = info.storage->tryLockForShare(query_id, settings.lock_acquire_timeout);
-                if (info.table_lock == nullptr)
-                {
-                    // Table was dropped while acquiring the lock, skipping table
-                    continue;
-                }
-            }
-
-            info.engine = info.storage->getName();
-
-            info.data = dynamic_cast<MergeTreeData *>(info.storage.get());
-            if (!info.data)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", info.engine);
-
-            return info;
-        }
-
-        return {};
-    }
-
-protected:
+private:
     String query_id;
     Settings settings;
 
@@ -99,22 +43,12 @@ protected:
     ColumnPtr database_column;
     ColumnPtr table_column;
     ColumnPtr active_column;
-    ColumnPtr storage_uuid_column;
 
     size_t next_row;
     size_t rows;
 
-    using StoragesMap = std::unordered_map<UUID, StoragePtr>;
+    using StoragesMap = std::map<std::pair<String, String>, StoragePtr>;
     StoragesMap storages;
-
-    bool needsLock = true;
-};
-
-
-class StoragesInfoStream : public StoragesInfoStreamBase
-{
-public:
-    StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context);
 };
 
 /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family.
@@ -143,11 +77,6 @@ protected:
 
     StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_);
 
-    virtual std::unique_ptr<StoragesInfoStreamBase> getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context)
-    {
-        return std::make_unique<StoragesInfoStream>(query_info, context);
-    }
-
     virtual void
     processNextStorage(ContextPtr context, MutableColumns & columns, std::vector<UInt8> & columns_mask, const StoragesInfo & info, bool has_state_column) = 0;
 };
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index 57f37c8899e..ffe74f1c94a 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -82,7 +82,6 @@
 #include <Storages/System/StorageSystemCertificates.h>
 #include <Storages/System/StorageSystemSchemaInferenceCache.h>
 #include <Storages/System/StorageSystemDroppedTables.h>
-#include <Storages/System/StorageSystemDroppedTablesParts.h>
 #include <Storages/System/StorageSystemZooKeeperConnection.h>
 #include <Storages/System/StorageSystemJemalloc.h>
 #include <Storages/System/StorageSystemScheduler.h>
@@ -157,7 +156,6 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database)
     attach<StorageSystemBackups>(context, system_database, "backups");
     attach<StorageSystemSchemaInferenceCache>(context, system_database, "schema_inference_cache");
     attach<StorageSystemDroppedTables>(context, system_database, "dropped_tables");
-    attach<StorageSystemDroppedTablesParts>(context, system_database, "dropped_tables_parts");
     attach<StorageSystemScheduler>(context, system_database, "scheduler");
 #if defined(__ELF__) && !defined(OS_FREEBSD)
     attach<StorageSystemSymbols>(context, system_database, "symbols");

From 276f040dd05d4e7bdfe0fe3752d0b86b4b4b3521 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Tue, 19 Dec 2023 10:01:37 +0000
Subject: [PATCH 109/137] ci fix

https://github.com/ClickHouse/ClickHouse/actions/runs/7256640256/job/19769624432?pr=58015#step:8:5466
```
Dec 19 03:09:41 /build/src/IO/AIO.cpp:140:15: error: use of undeclared identifier 'ErrnoException'; did you mean 'DB::ErrnoException'?
Dec 19 03:09:41   140 |         throw ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed");
```
---
 tests/ci/ci.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index bed12d54fe3..42b172f049d 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -511,7 +511,14 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None:
 def _fetch_commit_tokens(message: str) -> List[str]:
     pattern = r"#[\w-]+"
     matches = re.findall(pattern, message)
-    return matches
+    res = [
+        match
+        for match in matches
+        if match == "#no-merge-commit"
+        or match.startswith("#job_")
+        or match.startswith("#job-")
+    ]
+    return res
 
 
 def main() -> int:

From 3632688e55b2424dd2518c258b185a033f0fac19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 19 Dec 2023 11:10:11 +0100
Subject: [PATCH 110/137] Fix style

---
 src/Functions/FunctionsHashing.h                 | 1 -
 src/Functions/FunctionsStringHashFixedString.cpp | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 8fb21cd1ad4..d0edd34e657 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -62,7 +62,6 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int NOT_IMPLEMENTED;
     extern const int ILLEGAL_COLUMN;
-    extern const int SUPPORT_IS_DISABLED;
 }
 
 namespace impl
diff --git a/src/Functions/FunctionsStringHashFixedString.cpp b/src/Functions/FunctionsStringHashFixedString.cpp
index f7134953d52..9b613b4026e 100644
--- a/src/Functions/FunctionsStringHashFixedString.cpp
+++ b/src/Functions/FunctionsStringHashFixedString.cpp
@@ -33,6 +33,7 @@ namespace DB
 namespace ErrorCodes
 {
 extern const int ILLEGAL_COLUMN;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 
@@ -434,8 +435,6 @@ REGISTER_FUNCTION(HashFixedStrings)
             .categories{"Hash"}},
         FunctionFactory::CaseSensitive);
 #    endif
-
+}
 #endif
 }
-
-}

From 8bef92c92adf0ffac2a9960b3999f5881c71a833 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 19 Dec 2023 12:33:01 +0100
Subject: [PATCH 111/137] Fix clang-tidy

---
 src/Common/ArrayCache.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Common/ArrayCache.h b/src/Common/ArrayCache.h
index 47b91ff4eef..bbcf1a55bed 100644
--- a/src/Common/ArrayCache.h
+++ b/src/Common/ArrayCache.h
@@ -179,13 +179,22 @@ private:
         {
             ptr = mmap(address_hint, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
             if (MAP_FAILED == ptr)
-                throw ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}", ReadableSize(size));
+                throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}", ReadableSize(size));
         }
 
         ~Chunk()
         {
             if (ptr && 0 != munmap(ptr, size))
-                throw ErrnoException(DB::ErrorCodes::CANNOT_MUNMAP, "Allocator: Cannot munmap {}", ReadableSize(size));
+            {
+                try
+                {
+                    throw DB::ErrnoException(DB::ErrorCodes::CANNOT_MUNMAP, "Allocator: Cannot munmap {}", ReadableSize(size));
+                }
+                catch (DB::ErrnoException &)
+                {
+                    tryLogCurrentException(__PRETTY_FUNCTION__);
+                }
+            }
         }
 
         Chunk(Chunk && other) noexcept : ptr(other.ptr), size(other.size)

From e09c68db32391af68a15350b6df04addf7434a70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 19 Dec 2023 13:18:28 +0100
Subject: [PATCH 112/137] Remove fixed tests from analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index b0e611fa77b..735094df78b 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -2,7 +2,6 @@
 00717_merge_and_distributed
 00725_memory_tracking
 01062_pm_all_join_with_block_continuation
-01064_incremental_streaming_from_2_src_with_feedback
 01083_expressions_in_engine_arguments
 01155_rename_move_materialized_view
 01214_test_storage_merge_aliases_with_where
@@ -20,9 +19,7 @@
 01761_cast_to_enum_nullable
 01925_join_materialized_columns
 01925_test_storage_merge_aliases
-01947_mv_subquery
 01952_optimize_distributed_group_by_sharding_key
-02139_MV_with_scalar_subquery
 02174_cte_scalar_cache_mv
 02352_grouby_shadows_arg
 02354_annoy
@@ -36,7 +33,6 @@
 02404_memory_bound_merging
 02725_agg_projection_resprect_PK
 02763_row_policy_storage_merge_alias
-02765_parallel_replicas_final_modifier
 02784_parallel_replicas_automatic_decision_join
 02818_parameterized_view_with_cte_multiple_usage
 02815_range_dict_no_direct_join

From 8ab6564538a06d45545ade8c7f37da39587012ac Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 19 Dec 2023 13:30:59 +0100
Subject: [PATCH 113/137] Revert "Fix bug window functions: revert #39631"

---
 src/Interpreters/InterpreterSelectQuery.cpp   |   1 +
 src/Planner/Planner.cpp                       |   1 +
 src/Processors/QueryPlan/SortingStep.cpp      |  78 ++++++-
 src/Processors/QueryPlan/SortingStep.h        |  30 ++-
 src/Processors/QueryPlan/WindowStep.cpp       |   3 +-
 .../ScatterByPartitionTransform.cpp           | 129 +++++++++++
 .../Transforms/ScatterByPartitionTransform.h  |  34 +++
 ...568_window_functions_distributed.reference |  29 +++
 .../01568_window_functions_distributed.sql    |   4 +
 .../02884_parallel_window_functions.reference | 100 ++++++++
 .../02884_parallel_window_functions.sql       | 119 ++++++++++
 ...2_window_functions_logical_error.reference | 216 ------------------
 .../02942_window_functions_logical_error.sql  | 158 -------------
 13 files changed, 521 insertions(+), 381 deletions(-)
 create mode 100644 src/Processors/Transforms/ScatterByPartitionTransform.cpp
 create mode 100644 src/Processors/Transforms/ScatterByPartitionTransform.h
 create mode 100644 tests/queries/0_stateless/02884_parallel_window_functions.reference
 create mode 100644 tests/queries/0_stateless/02884_parallel_window_functions.sql
 delete mode 100644 tests/queries/0_stateless/02942_window_functions_logical_error.reference
 delete mode 100644 tests/queries/0_stateless/02942_window_functions_logical_error.sql

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 67245438156..4f4e96a9be7 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2942,6 +2942,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
             auto sorting_step = std::make_unique<SortingStep>(
                 query_plan.getCurrentDataStream(),
                 window.full_sort_description,
+                window.partition_by,
                 0 /* LIMIT */,
                 sort_settings,
                 settings.optimize_sorting_by_input_stream_properties);
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 2ab88491357..95c61f8d011 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -915,6 +915,7 @@ void addWindowSteps(QueryPlan & query_plan,
             auto sorting_step = std::make_unique<SortingStep>(
                 query_plan.getCurrentDataStream(),
                 window_description.full_sort_description,
+                window_description.partition_by,
                 0 /*limit*/,
                 sort_settings,
                 settings.optimize_sorting_by_input_stream_properties);
diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index 55ce763575e..641b9036d4c 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -1,3 +1,4 @@
+#include <memory>
 #include <stdexcept>
 #include <IO/Operators.h>
 #include <Processors/Merges/MergingSortedTransform.h>
@@ -9,6 +10,8 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Common/JSONBuilder.h>
 
+#include <Processors/ResizeProcessor.h>
+#include <Processors/Transforms/ScatterByPartitionTransform.h>
 
 namespace CurrentMetrics
 {
@@ -76,6 +79,21 @@ SortingStep::SortingStep(
     output_stream->sort_scope = DataStream::SortScope::Global;
 }
 
+SortingStep::SortingStep(
+        const DataStream & input_stream,
+        const SortDescription & description_,
+        const SortDescription & partition_by_description_,
+        UInt64 limit_,
+        const Settings & settings_,
+        bool optimize_sorting_by_input_stream_properties_)
+    : SortingStep(input_stream, description_, limit_, settings_, optimize_sorting_by_input_stream_properties_)
+{
+    partition_by_description = partition_by_description_;
+
+    output_stream->sort_description = result_description;
+    output_stream->sort_scope = DataStream::SortScope::Stream;
+}
+
 SortingStep::SortingStep(
     const DataStream & input_stream_,
     SortDescription prefix_description_,
@@ -117,7 +135,11 @@ void SortingStep::updateOutputStream()
 {
     output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
     output_stream->sort_description = result_description;
-    output_stream->sort_scope = DataStream::SortScope::Global;
+
+    if (partition_by_description.empty())
+        output_stream->sort_scope = DataStream::SortScope::Global;
+    else
+        output_stream->sort_scope = DataStream::SortScope::Stream;
 }
 
 void SortingStep::updateLimit(size_t limit_)
@@ -135,6 +157,55 @@ void SortingStep::convertToFinishSorting(SortDescription prefix_description_)
     prefix_description = std::move(prefix_description_);
 }
 
+void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline)
+{
+    size_t threads = pipeline.getNumThreads();
+    size_t streams = pipeline.getNumStreams();
+
+    if (!partition_by_description.empty() && threads > 1)
+    {
+        Block stream_header = pipeline.getHeader();
+
+        ColumnNumbers key_columns;
+        key_columns.reserve(partition_by_description.size());
+        for (auto & col : partition_by_description)
+        {
+            key_columns.push_back(stream_header.getPositionByName(col.column_name));
+        }
+
+        pipeline.transform([&](OutputPortRawPtrs ports)
+        {
+            Processors processors;
+            for (auto * port : ports)
+            {
+                auto scatter = std::make_shared<ScatterByPartitionTransform>(stream_header, threads, key_columns);
+                connect(*port, scatter->getInputs().front());
+                processors.push_back(scatter);
+            }
+            return processors;
+        });
+
+        if (streams > 1)
+        {
+            pipeline.transform([&](OutputPortRawPtrs ports)
+            {
+                Processors processors;
+                for (size_t i = 0; i < threads; ++i)
+                {
+                    size_t output_it = i;
+                    auto resize = std::make_shared<ResizeProcessor>(stream_header, streams, 1);
+                    auto & inputs = resize->getInputs();
+
+                    for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += threads, ++input_it)
+                        connect(*ports[output_it], *input_it);
+                    processors.push_back(resize);
+                }
+                return processors;
+            });
+        }
+    }
+}
+
 void SortingStep::finishSorting(
     QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, const UInt64 limit_)
 {
@@ -260,10 +331,12 @@ void SortingStep::fullSortStreams(
 void SortingStep::fullSort(
     QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, const UInt64 limit_, const bool skip_partial_sort)
 {
+    scatterByPartitionIfNeeded(pipeline);
+
     fullSortStreams(pipeline, sort_settings, result_sort_desc, limit_, skip_partial_sort);
 
     /// If there are several streams, then we merge them into one
-    if (pipeline.getNumStreams() > 1)
+    if (pipeline.getNumStreams() > 1 && (partition_by_description.empty() || pipeline.getNumThreads() == 1))
     {
         auto transform = std::make_shared<MergingSortedTransform>(
             pipeline.getHeader(),
@@ -295,6 +368,7 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
     {
         bool need_finish_sorting = (prefix_description.size() < result_description.size());
         mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit));
+
         if (need_finish_sorting)
         {
             finishSorting(pipeline, prefix_description, result_description, limit);
diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h
index 371a24ac6f2..52f48f66a32 100644
--- a/src/Processors/QueryPlan/SortingStep.h
+++ b/src/Processors/QueryPlan/SortingStep.h
@@ -40,6 +40,15 @@ public:
         const Settings & settings_,
         bool optimize_sorting_by_input_stream_properties_);
 
+    /// Full with partitioning
+    SortingStep(
+        const DataStream & input_stream,
+        const SortDescription & description_,
+        const SortDescription & partition_by_description_,
+        UInt64 limit_,
+        const Settings & settings_,
+        bool optimize_sorting_by_input_stream_properties_);
+
     /// FinishSorting
     SortingStep(
         const DataStream & input_stream_,
@@ -83,14 +92,24 @@ public:
         bool skip_partial_sort = false);
 
 private:
+    void scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline);
     void updateOutputStream() override;
 
-    static void
-    mergeSorting(QueryPipelineBuilder & pipeline, const Settings & sort_settings, const SortDescription & result_sort_desc, UInt64 limit_);
+    static void mergeSorting(
+        QueryPipelineBuilder & pipeline,
+        const Settings & sort_settings,
+        const SortDescription & result_sort_desc,
+        UInt64 limit_);
 
-    void mergingSorted(QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, UInt64 limit_);
+    void mergingSorted(
+        QueryPipelineBuilder & pipeline,
+        const SortDescription & result_sort_desc,
+        UInt64 limit_);
     void finishSorting(
-        QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, UInt64 limit_);
+        QueryPipelineBuilder & pipeline,
+        const SortDescription & input_sort_desc,
+        const SortDescription & result_sort_desc,
+        UInt64 limit_);
     void fullSort(
         QueryPipelineBuilder & pipeline,
         const SortDescription & result_sort_desc,
@@ -101,6 +120,9 @@ private:
 
     SortDescription prefix_description;
     const SortDescription result_description;
+
+    SortDescription partition_by_description;
+
     UInt64 limit;
     bool always_read_till_end = false;
 
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 9c68a4b73d1..bb4f429d626 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -67,7 +67,8 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
     // This resize is needed for cases such as `over ()` when we don't have a
     // sort node, and the input might have multiple streams. The sort node would
     // have resized it.
-    pipeline.resize(1);
+    if (window_description.full_sort_description.empty())
+        pipeline.resize(1);
 
     pipeline.addSimpleTransform(
         [&](const Block & /*header*/)
diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.cpp b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
new file mode 100644
index 00000000000..6e3cdc0fda1
--- /dev/null
+++ b/src/Processors/Transforms/ScatterByPartitionTransform.cpp
@@ -0,0 +1,129 @@
+#include <Processors/Transforms/ScatterByPartitionTransform.h>
+
+#include <Common/PODArray.h>
+#include <Core/ColumnNumbers.h>
+
+namespace DB
+{
+ScatterByPartitionTransform::ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_)
+    : IProcessor(InputPorts{header}, OutputPorts{output_size_, header})
+    , output_size(output_size_)
+    , key_columns(std::move(key_columns_))
+    , hash(0)
+{}
+
+IProcessor::Status ScatterByPartitionTransform::prepare()
+{
+    auto & input = getInputs().front();
+
+    /// Check all outputs are finished or ready to get data.
+
+    bool all_finished = true;
+    for (auto & output : outputs)
+    {
+        if (output.isFinished())
+            continue;
+
+        all_finished = false;
+    }
+
+    if (all_finished)
+    {
+        input.close();
+        return Status::Finished;
+    }
+
+    if (!all_outputs_processed)
+    {
+        auto output_it = outputs.begin();
+        bool can_push = false;
+        for (size_t i = 0; i < output_size; ++i, ++output_it)
+            if (!was_output_processed[i] && output_it->canPush())
+                can_push = true;
+        if (!can_push)
+            return Status::PortFull;
+        return Status::Ready;
+    }
+    /// Try get chunk from input.
+
+    if (input.isFinished())
+    {
+        for (auto & output : outputs)
+            output.finish();
+
+        return Status::Finished;
+    }
+
+    input.setNeeded();
+    if (!input.hasData())
+        return Status::NeedData;
+
+    chunk = input.pull();
+    has_data = true;
+    was_output_processed.assign(outputs.size(), false);
+
+    return Status::Ready;
+}
+
+void ScatterByPartitionTransform::work()
+{
+    if (all_outputs_processed)
+        generateOutputChunks();
+    all_outputs_processed = true;
+
+    size_t chunk_number = 0;
+    for (auto & output : outputs)
+    {
+        auto & was_processed = was_output_processed[chunk_number];
+        auto & output_chunk = output_chunks[chunk_number];
+        ++chunk_number;
+
+        if (was_processed)
+            continue;
+
+        if (output.isFinished())
+            continue;
+
+        if (!output.canPush())
+        {
+            all_outputs_processed = false;
+            continue;
+        }
+
+        output.push(std::move(output_chunk));
+        was_processed = true;
+    }
+
+    if (all_outputs_processed)
+    {
+        has_data = false;
+        output_chunks.clear();
+    }
+}
+
+void ScatterByPartitionTransform::generateOutputChunks()
+{
+    auto num_rows = chunk.getNumRows();
+    const auto & columns = chunk.getColumns();
+
+    hash.reset(num_rows);
+
+    for (const auto & column_number : key_columns)
+        columns[column_number]->updateWeakHash32(hash);
+
+    const auto & hash_data = hash.getData();
+    IColumn::Selector selector(num_rows);
+
+    for (size_t row = 0; row < num_rows; ++row)
+        selector[row] = hash_data[row] % output_size;
+
+    output_chunks.resize(output_size);
+    for (const auto & column : columns)
+    {
+        auto filtered_columns = column->scatter(output_size, selector);
+        for (size_t i = 0; i < output_size; ++i)
+            output_chunks[i].addColumn(std::move(filtered_columns[i]));
+    }
+}
+
+}
diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.h b/src/Processors/Transforms/ScatterByPartitionTransform.h
new file mode 100644
index 00000000000..327f6dd62b4
--- /dev/null
+++ b/src/Processors/Transforms/ScatterByPartitionTransform.h
@@ -0,0 +1,34 @@
+#pragma once
+#include <Common/WeakHash.h>
+#include <Core/ColumnNumbers.h>
+#include <Processors/IProcessor.h>
+
+namespace DB
+{
+
+struct ScatterByPartitionTransform : IProcessor
+{
+    ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_);
+
+    String getName() const override { return "ScatterByPartitionTransform"; }
+
+    Status prepare() override;
+    void work() override;
+
+private:
+
+    void generateOutputChunks();
+
+    size_t output_size;
+    ColumnNumbers key_columns;
+
+    bool has_data = false;
+    bool all_outputs_processed = true;
+    std::vector<char> was_output_processed;
+    Chunk chunk;
+
+    WeakHash32 hash;
+    Chunks output_chunks;
+};
+
+}
diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.reference b/tests/queries/0_stateless/01568_window_functions_distributed.reference
index 13ac0769a24..29ff2e7133c 100644
--- a/tests/queries/0_stateless/01568_window_functions_distributed.reference
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.reference
@@ -22,6 +22,16 @@ select sum(number) over w as x, max(number) over w as y from t_01568 window w as
 21	8
 21	8
 21	8
+select sum(number) over w, max(number) over w from t_01568 window w as (partition by p) order by p;
+3	2
+3	2
+3	2
+12	5
+12	5
+12	5
+21	8
+21	8
+21	8
 select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 6	2
 6	2
@@ -41,6 +51,25 @@ select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,
 42	8
 42	8
 42	8
+select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y SETTINGS max_threads = 1;
+6	2
+6	2
+6	2
+6	2
+6	2
+6	2
+24	5
+24	5
+24	5
+24	5
+24	5
+24	5
+42	8
+42	8
+42	8
+42	8
+42	8
+42	8
 select distinct sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 6	2
 24	5
diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.sql b/tests/queries/0_stateless/01568_window_functions_distributed.sql
index 95072d6460f..ecce7b412ba 100644
--- a/tests/queries/0_stateless/01568_window_functions_distributed.sql
+++ b/tests/queries/0_stateless/01568_window_functions_distributed.sql
@@ -15,8 +15,12 @@ from numbers(9);
 
 select sum(number) over w as x, max(number) over w as y from t_01568 window w as (partition by p) order by x, y;
 
+select sum(number) over w, max(number) over w from t_01568 window w as (partition by p) order by p;
+
 select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 
+select sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y SETTINGS max_threads = 1;
+
 select distinct sum(number) over w as x, max(number) over w as y from remote('127.0.0.{1,2}', '', t_01568) window w as (partition by p) order by x, y;
 
 -- window functions + aggregation w/shards
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference
new file mode 100644
index 00000000000..bac15838dc2
--- /dev/null
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference
@@ -0,0 +1,100 @@
+1
+-- { echoOn }
+
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY nw ASC, R DESC
+LIMIT 10;
+0	2	0
+1	2	0
+2	2	0
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY nw ASC, R DESC
+LIMIT 10
+SETTINGS max_threads = 1;
+0	2	0
+1	2	0
+2	2	0
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 0
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 1
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 2
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 3
+    GROUP BY
+        ac,
+        nw
+)
+GROUP BY nw
+ORDER BY nw ASC, R DESC
+LIMIT 10;
+0	2	0
+1	2	0
+2	2	0
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql
new file mode 100644
index 00000000000..3151b42f896
--- /dev/null
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql
@@ -0,0 +1,119 @@
+CREATE TABLE window_funtion_threading
+Engine = MergeTree
+ORDER BY (ac, nw)
+AS SELECT
+        toUInt64(toFloat32(number % 2) % 20000000) as ac,
+        toFloat32(1) as wg,        
+        toUInt16(toFloat32(number % 3) % 400) as nw
+FROM numbers_mt(10000000);
+
+SELECT count() FROM (EXPLAIN PIPELINE SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY nw ASC, R DESC
+LIMIT 10) where explain ilike '%ScatterByPartitionTransform%' SETTINGS max_threads = 4;
+
+-- { echoOn }
+
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY nw ASC, R DESC
+LIMIT 10;
+
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    GROUP BY ac, nw
+)
+GROUP BY nw
+ORDER BY nw ASC, R DESC
+LIMIT 10
+SETTINGS max_threads = 1;
+
+SELECT
+    nw,
+    sum(WR) AS R,
+    sumIf(WR, uniq_rows = 1) AS UNR
+FROM
+(
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 0
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 1
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 2
+    GROUP BY
+        ac,
+        nw
+    UNION ALL
+    SELECT
+        uniq(nw) OVER (PARTITION BY ac) AS uniq_rows,
+        AVG(wg) AS WR,
+        ac,
+        nw
+    FROM window_funtion_threading
+    WHERE (ac % 4) = 3
+    GROUP BY
+        ac,
+        nw
+)
+GROUP BY nw
+ORDER BY nw ASC, R DESC
+LIMIT 10;
diff --git a/tests/queries/0_stateless/02942_window_functions_logical_error.reference b/tests/queries/0_stateless/02942_window_functions_logical_error.reference
deleted file mode 100644
index 73f8351d9df..00000000000
--- a/tests/queries/0_stateless/02942_window_functions_logical_error.reference
+++ /dev/null
@@ -1,216 +0,0 @@
-1	901	19
-1	911	19
-1	921	19
-1	931	19
-1	941	19
-1	951	20
-1	961	20
-1	971	20
-1	981	20
-1	991	20
-2	902	19
-2	912	19
-2	922	19
-2	932	19
-2	942	19
-2	952	20
-2	962	20
-2	972	20
-2	982	20
-2	992	20
-3	903	19
-3	913	19
-3	923	19
-3	933	19
-3	943	19
-3	953	20
-3	963	20
-3	973	20
-3	983	20
-3	993	20
-4	904	19
-4	914	19
-4	924	19
-4	934	19
-4	944	19
-4	954	20
-4	964	20
-4	974	20
-4	984	20
-4	994	20
-5	905	19
-5	915	19
-5	925	19
-5	935	19
-5	945	19
-5	955	20
-5	965	20
-5	975	20
-5	985	20
-5	995	20
-6	906	19
-6	916	19
-6	926	19
-6	936	19
-6	946	19
-6	956	20
-6	966	20
-6	976	20
-6	986	20
-6	996	20
-7	907	19
-7	917	19
-7	927	19
-7	937	19
-7	947	19
-7	957	20
-7	967	20
-7	977	20
-7	987	20
-7	997	20
-8	908	19
-8	918	19
-8	928	19
-8	938	19
-8	948	19
-8	958	20
-8	968	20
-8	978	20
-8	988	20
-8	998	20
-9	909	19
-9	919	19
-9	929	19
-9	939	19
-9	949	19
-9	959	20
-9	969	20
-9	979	20
-9	989	20
-9	999	20
-1	1301	19
-1	1311	19
-1	1321	19
-1	1331	19
-1	1341	19
-1	1351	19
-1	1361	19
-1	1371	20
-1	1381	20
-1	1391	20
-1	1401	20
-1	1411	20
-1	1421	20
-1	1431	20
-2	1302	19
-2	1312	19
-2	1322	19
-2	1332	19
-2	1342	19
-2	1352	19
-2	1362	19
-2	1372	20
-2	1382	20
-2	1392	20
-2	1402	20
-2	1412	20
-2	1422	20
-2	1432	20
-3	1303	19
-3	1313	19
-3	1323	19
-3	1333	19
-3	1343	19
-3	1353	19
-3	1363	19
-3	1373	20
-3	1383	20
-3	1393	20
-3	1403	20
-3	1413	20
-3	1423	20
-3	1433	20
-4	1304	19
-4	1314	19
-4	1324	19
-4	1334	19
-4	1344	19
-4	1354	19
-4	1364	19
-4	1374	20
-4	1384	20
-4	1394	20
-4	1404	20
-4	1414	20
-4	1424	20
-4	1434	20
-5	1305	19
-5	1315	19
-5	1325	19
-5	1335	19
-5	1345	19
-5	1355	19
-5	1365	19
-5	1375	20
-5	1385	20
-5	1395	20
-5	1405	20
-5	1415	20
-5	1425	20
-5	1435	20
-6	1306	19
-6	1316	19
-6	1326	19
-6	1336	19
-6	1346	19
-6	1356	19
-6	1366	19
-6	1376	20
-6	1386	20
-6	1396	20
-6	1406	20
-6	1416	20
-6	1426	20
-6	1436	20
-7	1307	19
-7	1317	19
-7	1327	19
-7	1337	19
-7	1347	19
-7	1357	19
-7	1367	19
-7	1377	20
-7	1387	20
-7	1397	20
-7	1407	20
-7	1417	20
-7	1427	20
-7	1437	20
-8	1308	19
-8	1318	19
-8	1328	19
-8	1338	19
-8	1348	19
-8	1358	19
-8	1368	19
-8	1378	20
-8	1388	20
-8	1398	20
-8	1408	20
-8	1418	20
-8	1428	20
-8	1438	20
-9	1309	19
-9	1319	19
-9	1329	19
-9	1339	19
-9	1349	19
-9	1359	19
-9	1369	19
-9	1379	20
-9	1389	20
-9	1399	20
-9	1409	20
-9	1419	20
-9	1429	20
-9	1439	20
diff --git a/tests/queries/0_stateless/02942_window_functions_logical_error.sql b/tests/queries/0_stateless/02942_window_functions_logical_error.sql
deleted file mode 100644
index 1e4371a134f..00000000000
--- a/tests/queries/0_stateless/02942_window_functions_logical_error.sql
+++ /dev/null
@@ -1,158 +0,0 @@
-DROP TABLE IF EXISTS posts;
-DROP TABLE IF EXISTS post_metrics;
-
-CREATE TABLE IF NOT EXISTS posts
-(
-    `page_id` LowCardinality(String),
-    `post_id` String CODEC(LZ4),
-    `host_id` UInt32 CODEC(T64, LZ4),
-    `path_id` UInt32,
-    `created` DateTime CODEC(T64, LZ4),
-    `as_of` DateTime CODEC(T64, LZ4)
-)
-ENGINE = ReplacingMergeTree(as_of)
-PARTITION BY toStartOfMonth(created)
-ORDER BY (page_id, post_id)
-TTL created + toIntervalMonth(26);
-
-
-INSERT INTO posts SELECT
-    repeat('a', (number % 10) + 1),
-    toString(number),
-    number % 10,
-    number,
-    now() - toIntervalMinute(number),
-    now()
-FROM numbers(1000);
-
-
-CREATE TABLE IF NOT EXISTS post_metrics
-(
-    `page_id` LowCardinality(String),
-    `post_id` String CODEC(LZ4),
-    `created` DateTime CODEC(T64, LZ4),
-    `impressions` UInt32 CODEC(T64, LZ4),
-    `clicks` UInt32 CODEC(T64, LZ4),
-    `as_of` DateTime CODEC(T64, LZ4)
-)
-ENGINE = ReplacingMergeTree(as_of)
-PARTITION BY toStartOfMonth(created)
-ORDER BY (page_id, post_id)
-TTL created + toIntervalMonth(26);
-
-
-INSERT INTO post_metrics SELECT
-    repeat('a', (number % 10) + 1),
-    toString(number),
-    now() - toIntervalMinute(number),
-    number * 100,
-    number * 10,
-    now()
-FROM numbers(1000);
-
-
-SELECT
-    host_id,
-    path_id,
-    max(rank) AS rank
-FROM
-(
-    WITH
-        as_of_posts AS
-        (
-            SELECT
-                *,
-                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
-            FROM posts
-            WHERE (created >= subtractHours(now(), 24)) AND (host_id > 0)
-        ),
-        as_of_post_metrics AS
-        (
-            SELECT
-                *,
-                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
-            FROM post_metrics
-            WHERE created >= subtractHours(now(), 24)
-        )
-    SELECT
-        page_id,
-        post_id,
-        host_id,
-        path_id,
-        impressions,
-        clicks,
-        ntile(20) OVER (PARTITION BY page_id ORDER BY clicks ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS rank
-    FROM as_of_posts
-    GLOBAL LEFT JOIN as_of_post_metrics USING (page_id, post_id, row_num)
-    WHERE (row_num = 1) AND (impressions > 0)
-) AS t
-WHERE t.rank > 18
-GROUP BY
-    host_id,
-    path_id
-ORDER BY host_id, path_id;
-
-
-INSERT INTO posts SELECT
-    repeat('a', (number % 10) + 1),
-    toString(number),
-    number % 10,
-    number,
-    now() - toIntervalMinute(number),
-    now()
-FROM numbers(100000);
-
-
-INSERT INTO post_metrics SELECT
-    repeat('a', (number % 10) + 1),
-    toString(number),
-    now() - toIntervalMinute(number),
-    number * 100,
-    number * 10,
-    now()
-FROM numbers(100000);
-
-
-SELECT
-    host_id,
-    path_id,
-    max(rank) AS rank
-FROM
-(
-    WITH
-        as_of_posts AS
-        (
-            SELECT
-                *,
-                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
-            FROM posts
-            WHERE (created >= subtractHours(now(), 24)) AND (host_id > 0)
-        ),
-        as_of_post_metrics AS
-        (
-            SELECT
-                *,
-                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
-            FROM post_metrics
-            WHERE created >= subtractHours(now(), 24)
-        )
-    SELECT
-        page_id,
-        post_id,
-        host_id,
-        path_id,
-        impressions,
-        clicks,
-        ntile(20) OVER (PARTITION BY page_id ORDER BY clicks ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS rank
-    FROM as_of_posts
-    GLOBAL LEFT JOIN as_of_post_metrics USING (page_id, post_id, row_num)
-    WHERE (row_num = 1) AND (impressions > 0)
-) AS t
-WHERE t.rank > 18
-GROUP BY
-    host_id,
-    path_id
-ORDER BY host_id, path_id;
-
-DROP TABLE posts;
-DROP TABLE post_metrics;

From b4fec61814f2a57aab18f5a15b9bdff4fd703848 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 18 Dec 2023 20:50:58 +0100
Subject: [PATCH 114/137] fix style and black

---
 docker/test/stateful/run.sh |  4 ++--
 tests/clickhouse-test       | 37 +++++++++++++++++++++----------------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index 82587efcb3d..806b57c4616 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -78,7 +78,7 @@ function start()
             tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
             break
         fi
-        timeout_with_logging 120 service clickhouse-server start
+        timeout 120 service clickhouse-server start
         sleep 0.5
         counter=$((counter + 1))
     done
@@ -163,7 +163,7 @@ function run_tests()
 }
 
 export -f run_tests
-timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||:
+timeout "$MAX_RUN_TIME" bash -c run_tests ||:
 
 echo "Files in current directory"
 ls -la ./
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 1ba67a3b2f4..1df2bc8271f 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -278,6 +278,7 @@ def need_retry(args, stdout, stderr, total_time):
         msg in stderr for msg in MESSAGES_TO_RETRY
     )
 
+
 def get_processlist_size(args):
     if args.replicated_database:
         return int(
@@ -286,27 +287,26 @@ def get_processlist_size(args):
                 """
                 SELECT
                     count()
-                FROM 
+                FROM
                     FROM system.processes
                 WHERE query NOT LIKE '%system.processes%'
-                FORMAT Vertical
                 """,
-                ).strip()
+            ).strip()
         )
     else:
         return int(
-                clickhouse_execute(
+            clickhouse_execute(
                 args,
                 """
-            SELECT
-                count()
-            FROM system.processes 
-            WHERE query NOT LIKE '%system.processes%'
-            FORMAT Vertical
-            """,
+                SELECT
+                    count()
+                FROM system.processes
+                WHERE query NOT LIKE '%system.processes%'
+                """,
             ).strip()
         )
 
+
 def get_processlist_with_stacktraces(args):
     if args.replicated_database:
         return clickhouse_execute(
@@ -356,7 +356,6 @@ def get_processlist_with_stacktraces(args):
         )
 
 
-
 def get_transactions_list(args):
     try:
         if args.replicated_database:
@@ -2448,7 +2447,7 @@ def main(args):
 
     if args.hung_check:
         # Some queries may execute in background for some time after test was finished. This is normal.
-        print("Checking the hung queries: ", end='')
+        print("Checking the hung queries: ", end="")
         hung_count = 0
         try:
             deadline = datetime.now() + timedelta(seconds=90)
@@ -2457,16 +2456,18 @@ def main(args):
                 if hung_count == 0:
                     print(" done")
                     break
-                print(". ", end='')
+                print(". ", end="")
         except Exception as e:
             print(
                 colored(
-                    "\nHung check failed. Failed to get processlist size: " + str(e), args, "red", attrs=["bold"]
+                    "\nHung check failed. Failed to get processlist size: " + str(e),
+                    args,
+                    "red",
+                    attrs=["bold"],
                 )
             )
             exit_code.value = 1
 
-
         processlist = ""
         if hung_count > 0:
             try:
@@ -2474,7 +2475,11 @@ def main(args):
             except Exception as e:
                 print(
                     colored(
-                        "\nHung check failed. Failed to get processlist with stacktraces: " + str(e), args, "red", attrs=["bold"]
+                        "\nHung check failed. Failed to get processlist with stacktraces: "
+                        + str(e),
+                        args,
+                        "red",
+                        attrs=["bold"],
                     )
                 )
                 exit_code.value = 1

From 64247e9033f4896eab97be8a628dba0de9ab8063 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 19 Dec 2023 14:28:41 +0100
Subject: [PATCH 115/137] Try fix clang-tidy again

---
 src/Common/ArrayCache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/ArrayCache.h b/src/Common/ArrayCache.h
index bbcf1a55bed..b6dde039227 100644
--- a/src/Common/ArrayCache.h
+++ b/src/Common/ArrayCache.h
@@ -192,7 +192,7 @@ private:
                 }
                 catch (DB::ErrnoException &)
                 {
-                    tryLogCurrentException(__PRETTY_FUNCTION__);
+                    DB::tryLogCurrentException(__PRETTY_FUNCTION__);
                 }
             }
         }

From a375b1eab835b2a6be545886a7c686b87d41831c Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 19 Dec 2023 16:41:51 +0100
Subject: [PATCH 116/137] Fix handling of unavailable replicas before first
 request happened (#57933)

---
 .../ParallelReplicasReadingCoordinator.cpp    | 36 +++++++++++--------
 .../ParallelReplicasReadingCoordinator.h      |  5 +++
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index a2765c071a2..95313654c0e 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -223,13 +223,16 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc
 
 void DefaultCoordinator::markReplicaAsUnavailable(size_t replica_number)
 {
-    LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
+    if (stats[replica_number].is_unavailable == false)
+    {
+        LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
 
-    ++unavailable_replicas_count;
-    stats[replica_number].is_unavailable = true;
+        stats[replica_number].is_unavailable = true;
+        ++unavailable_replicas_count;
 
-    if (sent_initial_requests == replicas_count - unavailable_replicas_count)
-        finalizeReadingState();
+        if (sent_initial_requests == replicas_count - unavailable_replicas_count)
+            finalizeReadingState();
+    }
 }
 
 void DefaultCoordinator::finalizeReadingState()
@@ -405,12 +408,13 @@ public:
 template <CoordinationMode mode>
 void InOrderCoordinator<mode>::markReplicaAsUnavailable(size_t replica_number)
 {
-    LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
+    if (stats[replica_number].is_unavailable == false)
+    {
+        LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
 
-    stats[replica_number].is_unavailable = true;
-    ++unavailable_replicas_count;
-
-    /// There is nothing to do else.
+        stats[replica_number].is_unavailable = true;
+        ++unavailable_replicas_count;
+    }
 }
 
 template <CoordinationMode mode>
@@ -577,11 +581,9 @@ void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica
     std::lock_guard lock(mutex);
 
     if (!pimpl)
-    {
-        initialize();
-    }
-
-    return pimpl->markReplicaAsUnavailable(replica_number);
+        unavailable_nodes_registered_before_initialization.push_back(replica_number);
+    else
+        pimpl->markReplicaAsUnavailable(replica_number);
 }
 
 void ParallelReplicasReadingCoordinator::initialize()
@@ -598,8 +600,12 @@ void ParallelReplicasReadingCoordinator::initialize()
             pimpl = std::make_unique<InOrderCoordinator<CoordinationMode::ReverseOrder>>(replicas_count);
             break;
     }
+
     if (progress_callback)
         pimpl->setProgressCallback(std::move(progress_callback));
+
+    for (const auto replica : unavailable_nodes_registered_before_initialization)
+        pimpl->markReplicaAsUnavailable(replica);
 }
 
 ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_) : replicas_count(replicas_count_) {}
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
index 449421797ce..795d7462278 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
@@ -39,6 +39,11 @@ private:
     std::atomic<bool> initialized{false};
     std::unique_ptr<ImplInterface> pimpl;
     ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation
+
+    /// To initialize `pimpl` we need to know the coordinator mode. We can know it only from initial announcement or regular request.
+    /// The problem is `markReplicaAsUnavailable` might be called before any of these requests happened.
+    /// In this case we will remember the numbers of unavailable replicas and apply this knowledge later on initialization.
+    std::vector<size_t> unavailable_nodes_registered_before_initialization;
 };
 
 using ParallelReplicasReadingCoordinatorPtr = std::shared_ptr<ParallelReplicasReadingCoordinator>;

From a69005525b86fd8fd804d40b29bd643faa7d7143 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 19 Dec 2023 16:15:39 +0000
Subject: [PATCH 117/137] Fix function ntile.

---
 src/Processors/Transforms/WindowTransform.cpp | 128 ++++++++++++------
 ...84_parallel_window_functions_bug.reference |   0
 .../02884_parallel_window_functions_bug.sql   |  84 ++++++++++++
 3 files changed, 174 insertions(+), 38 deletions(-)
 create mode 100644 tests/queries/0_stateless/02884_parallel_window_functions_bug.reference
 create mode 100644 tests/queries/0_stateless/02884_parallel_window_functions_bug.sql

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 7afc7a38aab..6cf874d24ea 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -1585,17 +1585,21 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc
     static constexpr size_t ARGUMENT_VALUE = 0;
     static constexpr size_t ARGUMENT_TIME = 1;
 
-    WindowFunctionExponentialTimeDecayedSum(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+    static Float64 getDecayLength(const Array & parameters_, const std::string & name_)
     {
         if (parameters_.size() != 1)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
                 "Function {} takes exactly one parameter", name_);
         }
-        decay_length = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
+        return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
+    }
 
+    WindowFunctionExponentialTimeDecayedSum(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+        , decay_length(getDecayLength(parameters_, name_))
+    {
         if (argument_types.size() != 2)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
@@ -1670,7 +1674,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc
     }
 
     private:
-        Float64 decay_length;
+        const Float64 decay_length;
 };
 
 struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
@@ -1678,17 +1682,21 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
     static constexpr size_t ARGUMENT_VALUE = 0;
     static constexpr size_t ARGUMENT_TIME = 1;
 
-    WindowFunctionExponentialTimeDecayedMax(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+    static Float64 getDecayLength(const Array & parameters_, const std::string & name_)
     {
         if (parameters_.size() != 1)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
                 "Function {} takes exactly one parameter", name_);
         }
-        decay_length = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
+        return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
+    }
 
+    WindowFunctionExponentialTimeDecayedMax(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+        , decay_length(getDecayLength(parameters_, name_))
+    {
         if (argument_types.size() != 2)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
@@ -1742,24 +1750,28 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
     }
 
     private:
-        Float64 decay_length;
+        const Float64 decay_length;
 };
 
 struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFunction<ExponentialTimeDecayedSumState>
 {
     static constexpr size_t ARGUMENT_TIME = 0;
 
-    WindowFunctionExponentialTimeDecayedCount(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+    static Float64 getDecayLength(const Array & parameters_, const std::string & name_)
     {
         if (parameters_.size() != 1)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
                 "Function {} takes exactly one parameter", name_);
         }
-        decay_length = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
+        return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
+    }
 
+    WindowFunctionExponentialTimeDecayedCount(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+        , decay_length(getDecayLength(parameters_, name_))
+    {
         if (argument_types.size() != 1)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
@@ -1823,7 +1835,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu
     }
 
     private:
-        Float64 decay_length;
+        const Float64 decay_length;
 };
 
 struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunction<ExponentialTimeDecayedAvgState>
@@ -1831,17 +1843,21 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc
     static constexpr size_t ARGUMENT_VALUE = 0;
     static constexpr size_t ARGUMENT_TIME = 1;
 
-    WindowFunctionExponentialTimeDecayedAvg(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+    static Float64 getDecayLength(const Array & parameters_, const std::string & name_)
     {
         if (parameters_.size() != 1)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
                 "Function {} takes exactly one parameter", name_);
         }
-        decay_length = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
+        return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
+    }
 
+    WindowFunctionExponentialTimeDecayedAvg(const std::string & name_,
+            const DataTypes & argument_types_, const Array & parameters_)
+        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+        , decay_length(getDecayLength(parameters_, name_))
+    {
         if (argument_types.size() != 2)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
@@ -1933,7 +1949,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc
     }
 
     private:
-        Float64 decay_length;
+        const Float64 decay_length;
 };
 
 struct WindowFunctionRowNumber final : public WindowFunction
@@ -1955,12 +1971,30 @@ struct WindowFunctionRowNumber final : public WindowFunction
     }
 };
 
+namespace
+{
+    struct NtileState
+    {
+        UInt64 buckets = 0;
+        RowNumber start_row;
+        UInt64 current_partition_rows = 0;
+        UInt64 current_partition_inserted_row = 0;
+
+        void windowInsertResultInto(
+            const WindowTransform * transform,
+            size_t function_index,
+            const DataTypes & argument_types);
+
+        static void checkWindowFrameType(const WindowTransform * transform);
+    };
+}
+
 // Usage: ntile(n). n is the number of buckets.
-struct WindowFunctionNtile final : public WindowFunction
+struct WindowFunctionNtile final : public StatefulWindowFunction<NtileState>
 {
     WindowFunctionNtile(const std::string & name_,
             const DataTypes & argument_types_, const Array & parameters_)
-        : WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
+        : StatefulWindowFunction<NtileState>(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
     {
         if (argument_types.size() != 1)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one argument", name_);
@@ -1982,6 +2016,19 @@ struct WindowFunctionNtile final : public WindowFunction
 
     void windowInsertResultInto(const WindowTransform * transform,
         size_t function_index) override
+    {
+        const auto & workspace = transform->workspaces[function_index];
+        auto & state = getState(workspace);
+        state.windowInsertResultInto(transform, function_index, argument_types);
+    }
+};
+
+namespace
+{
+    void NtileState::windowInsertResultInto(
+        const WindowTransform * transform,
+        size_t function_index,
+        const DataTypes & argument_types)
     {
         if (!buckets) [[unlikely]]
         {
@@ -2072,13 +2119,8 @@ struct WindowFunctionNtile final : public WindowFunction
             bucket_num += 1;
         }
     }
-private:
-    UInt64 buckets = 0;
-    RowNumber start_row;
-    UInt64 current_partition_rows = 0;
-    UInt64 current_partition_inserted_row = 0;
 
-    static void checkWindowFrameType(const WindowTransform * transform)
+    void NtileState::checkWindowFrameType(const WindowTransform * transform)
     {
         if (transform->order_by_indices.empty())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause");
@@ -2093,7 +2135,7 @@ private:
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'");
         }
     }
-};
+}
 
 // ClickHouse-specific variant of lag/lead that respects the window frame.
 template <bool is_lead>
@@ -2298,16 +2340,18 @@ struct NonNegativeDerivativeState
     Float64 previous_timestamp = 0;
 };
 
-// nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL 1 SECOND])
-struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction<NonNegativeDerivativeState>
+struct NonNegativeDerivativeParams
 {
     static constexpr size_t ARGUMENT_METRIC = 0;
     static constexpr size_t ARGUMENT_TIMESTAMP = 1;
     static constexpr size_t ARGUMENT_INTERVAL = 2;
 
-    WindowFunctionNonNegativeDerivative(const std::string & name_,
-                                            const DataTypes & argument_types_, const Array & parameters_)
-        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+    Float64 interval_length = 1;
+    bool interval_specified = false;
+    Int64 ts_scale_multiplier = 0;
+
+    NonNegativeDerivativeParams(
+        const std::string & name_, const DataTypes & argument_types, const Array & parameters)
     {
         if (!parameters.empty())
         {
@@ -2365,6 +2409,18 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction
             interval_specified = true;
         }
     }
+};
+
+// nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL 1 SECOND])
+struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction<NonNegativeDerivativeState>, public NonNegativeDerivativeParams
+{
+    using Params = NonNegativeDerivativeParams;
+
+    WindowFunctionNonNegativeDerivative(const std::string & name_,
+                                            const DataTypes & argument_types_, const Array & parameters_)
+        : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+        , NonNegativeDerivativeParams(name, argument_types, parameters)
+    {}
 
     bool allocatesMemoryInArena() const override { return false; }
 
@@ -2405,10 +2461,6 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction
 
         WindowFunctionHelpers::setValueToOutputColumn<Float64>(transform, function_index, result >= 0 ? result : 0);
     }
-private:
-    Float64 interval_length = 1;
-    bool interval_specified = false;
-    Int64 ts_scale_multiplier = 0;
 };
 
 
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions_bug.reference b/tests/queries/0_stateless/02884_parallel_window_functions_bug.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions_bug.sql b/tests/queries/0_stateless/02884_parallel_window_functions_bug.sql
new file mode 100644
index 00000000000..84bc69e2310
--- /dev/null
+++ b/tests/queries/0_stateless/02884_parallel_window_functions_bug.sql
@@ -0,0 +1,84 @@
+CREATE TABLE IF NOT EXISTS posts
+(
+    `page_id` LowCardinality(String),
+    `post_id` String CODEC(LZ4),
+    `host_id` UInt32 CODEC(T64, LZ4),
+    `path_id` UInt32,
+    `created` DateTime CODEC(T64, LZ4),
+    `as_of` DateTime CODEC(T64, LZ4)
+)
+ENGINE = ReplacingMergeTree(as_of)
+PARTITION BY toStartOfMonth(created)
+ORDER BY (page_id, post_id);
+
+CREATE TABLE IF NOT EXISTS post_metrics
+(
+    `page_id` LowCardinality(String),
+    `post_id` String CODEC(LZ4),
+    `created` DateTime CODEC(T64, LZ4),
+    `impressions` UInt32 CODEC(T64, LZ4),
+    `clicks` UInt32 CODEC(T64, LZ4),
+    `as_of` DateTime CODEC(T64, LZ4)
+)
+ENGINE = ReplacingMergeTree(as_of)
+PARTITION BY toStartOfMonth(created)
+ORDER BY (page_id, post_id);
+
+INSERT INTO posts SELECT
+    repeat('a', (number % 10) + 1),
+    toString(number),
+    number % 10,
+    number,
+    now() - toIntervalMinute(number),
+    now()
+FROM numbers(100000);
+
+INSERT INTO post_metrics SELECT
+    repeat('a', (number % 10) + 1),
+    toString(number),
+    now() - toIntervalMinute(number),
+    number * 100,
+    number * 10,
+    now()
+FROM numbers(100000);
+
+SELECT
+    host_id,
+    path_id,
+    max(rank) AS rank
+FROM
+(
+    WITH
+        as_of_posts AS
+        (
+            SELECT
+                *,
+                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
+            FROM posts
+            WHERE (created >= subtractHours(now(), 24)) AND (host_id > 0)
+        ),
+        as_of_post_metrics AS
+        (
+            SELECT
+                *,
+                row_number() OVER (PARTITION BY (page_id, post_id) ORDER BY as_of DESC) AS row_num
+            FROM post_metrics
+            WHERE created >= subtractHours(now(), 24)
+        )
+    SELECT
+        page_id,
+        post_id,
+        host_id,
+        path_id,
+        impressions,
+        clicks,
+        ntile(20) OVER (PARTITION BY page_id ORDER BY clicks ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS rank
+    FROM as_of_posts
+    GLOBAL LEFT JOIN as_of_post_metrics USING (page_id, post_id, row_num)
+    WHERE (row_num = 1) AND (impressions > 0)
+) AS t
+WHERE t.rank > 18
+GROUP BY
+    host_id,
+    path_id
+FORMAT Null;

From ddb582c8b41ab89d5be29a988d1cb5f6147a97b0 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 19 Dec 2023 16:52:50 +0000
Subject: [PATCH 118/137] Make windowInsertResultInto constant

---
 src/Processors/Transforms/WindowTransform.cpp | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 6cf874d24ea..47b5b900400 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -67,7 +67,7 @@ public:
 
     // Must insert the result for current_row.
     virtual void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) = 0;
+        size_t function_index) const = 0;
 
     virtual std::optional<WindowFrame> getDefaultFrame() const { return {}; }
 };
@@ -1463,7 +1463,7 @@ struct WindowFunctionRank final : public WindowFunction
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         IColumn & to = *transform->blockAt(transform->current_row)
             .output_columns[function_index];
@@ -1482,7 +1482,7 @@ struct WindowFunctionDenseRank final : public WindowFunction
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         IColumn & to = *transform->blockAt(transform->current_row)
             .output_columns[function_index];
@@ -1561,7 +1561,7 @@ struct StatefulWindowFunction : public WindowFunction
 
     bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v<State>; }
 
-    State & getState(const WindowFunctionWorkspace & workspace)
+    State & getState(const WindowFunctionWorkspace & workspace) const
     {
         return *static_cast<State *>(static_cast<void *>(workspace.aggregate_function_state.data()));
     }
@@ -1626,7 +1626,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         const auto & workspace = transform->workspaces[function_index];
         auto & state = getState(workspace);
@@ -1723,7 +1723,7 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         Float64 result = std::numeric_limits<Float64>::quiet_NaN();
 
@@ -1790,7 +1790,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         const auto & workspace = transform->workspaces[function_index];
         auto & state = getState(workspace);
@@ -1884,7 +1884,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         const auto & workspace = transform->workspaces[function_index];
         auto & state = getState(workspace);
@@ -1962,7 +1962,7 @@ struct WindowFunctionRowNumber final : public WindowFunction
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         IColumn & to = *transform->blockAt(transform->current_row)
             .output_columns[function_index];
@@ -2015,7 +2015,7 @@ struct WindowFunctionNtile final : public StatefulWindowFunction<NtileState>
     }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         const auto & workspace = transform->workspaces[function_index];
         auto & state = getState(workspace);
@@ -2207,7 +2207,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         const auto & current_block = transform->blockAt(transform->current_row);
         IColumn & to = *current_block.output_columns[function_index];
@@ -2297,7 +2297,7 @@ struct WindowFunctionNthValue final : public WindowFunction
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-        size_t function_index) override
+        size_t function_index) const override
     {
         const auto & current_block = transform->blockAt(transform->current_row);
         IColumn & to = *current_block.output_columns[function_index];
@@ -2425,7 +2425,7 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction
     bool allocatesMemoryInArena() const override { return false; }
 
     void windowInsertResultInto(const WindowTransform * transform,
-                                size_t function_index) override
+                                size_t function_index) const override
     {
         const auto & current_block = transform->blockAt(transform->current_row);
         const auto & workspace = transform->workspaces[function_index];

From b4d0d63259a38d2ca8ed30379d384fdb32c591b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 19 Dec 2023 18:03:57 +0100
Subject: [PATCH 119/137] Happy new year

---
 tests/queries/0_stateless/02833_local_with_dialect.reference | 1 -
 tests/queries/0_stateless/02833_local_with_dialect.sh        | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02833_local_with_dialect.reference b/tests/queries/0_stateless/02833_local_with_dialect.reference
index dbb67375997..573541ac970 100644
--- a/tests/queries/0_stateless/02833_local_with_dialect.reference
+++ b/tests/queries/0_stateless/02833_local_with_dialect.reference
@@ -1,2 +1 @@
 0
-[?2004h[?2004lBye.
diff --git a/tests/queries/0_stateless/02833_local_with_dialect.sh b/tests/queries/0_stateless/02833_local_with_dialect.sh
index 012a6d91269..de009961cba 100755
--- a/tests/queries/0_stateless/02833_local_with_dialect.sh
+++ b/tests/queries/0_stateless/02833_local_with_dialect.sh
@@ -6,4 +6,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 
-echo "exit" | ${CLICKHOUSE_LOCAL} --query "from s\"SELECT * FROM numbers(1)\"" --dialect prql --interactive
+# Remove last line since the good bye message changes depending on the date
+echo "exit" | ${CLICKHOUSE_LOCAL} --query "from s\"SELECT * FROM numbers(1)\"" --dialect prql --interactive | head -n -1

From 9510b2ccfee2c58787ddeebf9bf86e30a5f40668 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 19 Dec 2023 17:57:52 +0000
Subject: [PATCH 120/137] Follow up for 57691

---
 src/Processors/Transforms/AggregatingTransform.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index 355271e0c05..ecf8163a9d9 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -377,9 +377,7 @@ private:
         auto & output = outputs.front();
         auto chunk = std::move(single_level_chunks.back());
         single_level_chunks.pop_back();
-        const auto has_rows = chunk.hasRows();
-        if (has_rows)
-            output.push(std::move(chunk));
+        output.push(std::move(chunk));
 
         if (finished && single_level_chunks.empty())
         {
@@ -387,7 +385,7 @@ private:
             return Status::Finished;
         }
 
-        return has_rows ? Status::PortFull : Status::Ready;
+        return Status::PortFull;
     }
 
     /// Read all sources and try to push current bucket.
@@ -466,7 +464,8 @@ private:
             auto block = params->aggregator.prepareBlockAndFillWithoutKey(
                 *first, params->final, first->type != AggregatedDataVariants::Type::without_key);
 
-            single_level_chunks.emplace_back(convertToChunk(block));
+            if (block.rows() > 0)
+                single_level_chunks.emplace_back(convertToChunk(block));
         }
     }
 
@@ -493,7 +492,8 @@ private:
 
         auto blocks = params->aggregator.prepareBlockAndFillSingleLevel</* return_single_block */ false>(*first, params->final);
         for (auto & block : blocks)
-            single_level_chunks.emplace_back(convertToChunk(block));
+            if (block.rows() > 0)
+                single_level_chunks.emplace_back(convertToChunk(block));
 
         finished = true;
         data.reset();

From 58f75c96790e54bce9e55e51c31f840b577d9c2b Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Tue, 19 Dec 2023 10:17:15 -0800
Subject: [PATCH 121/137] Update src/Storages/StorageFuzzJSON.cpp

Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
---
 src/Storages/StorageFuzzJSON.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp
index 4d94fe3cdb0..87790dd2fdc 100644
--- a/src/Storages/StorageFuzzJSON.cpp
+++ b/src/Storages/StorageFuzzJSON.cpp
@@ -481,7 +481,7 @@ protected:
     {
         Columns columns;
         columns.reserve(block_header.columns());
-        for (const auto& col : block_header)
+        for (const auto & col : block_header)
         {
             chassert(col.type->getTypeId() == TypeIndex::String);
             columns.emplace_back(createColumn());

From d8383377ea12ac32f88a8c19231687f73f90f6a1 Mon Sep 17 00:00:00 2001
From: Max K <max.kainov@clickhouse.com>
Date: Tue, 19 Dec 2023 20:14:47 +0100
Subject: [PATCH 122/137] always run ast_fuzz and sqllancer #no-merge-commit
 (#58049)

---
 tests/ci/ci.py        |  8 +++++---
 tests/ci/ci_config.py | 22 +++++++++++-----------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index bed12d54fe3..e3cac7c6ec5 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -376,6 +376,8 @@ def _configure_jobs(
             if job_config.run_by_label in pr_labels:
                 for batch in range(num_batches):  # type: ignore
                     batches_to_do.append(batch)
+        elif job_config.run_always:
+            batches_to_do.append(batch)
         else:
             # this job controlled by digest, add to todo if it's not successfully done before
             for batch in range(num_batches):  # type: ignore
@@ -400,10 +402,10 @@ def _configure_jobs(
             for token in commit_tokens
             if token.startswith("#job_")
         ]
-        assert any(
-            len(x) > 1 for x in requested_jobs
-        ), f"Invalid job names requested [{requested_jobs}]"
         if requested_jobs:
+            assert any(
+                len(x) > 1 for x in requested_jobs
+            ), f"Invalid job names requested [{requested_jobs}]"
             jobs_to_do_requested = []
             for job in requested_jobs:
                 job_with_parents = CI_CONFIG.get_job_with_parents(job)
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index f76aedac80b..8bf9c62a17b 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -37,6 +37,7 @@ class JobConfig:
     timeout: Optional[int] = None
     num_batches: int = 1
     run_by_label: str = ""
+    run_always: bool = False
 
 
 @dataclass
@@ -145,12 +146,11 @@ integration_check_digest = DigestConfig(
         "clickhouse/postgresql-java-client",
     ],
 )
-# FIXME: which tests are AST_FUZZER_TEST? just python?
-# FIXME: should ast fuzzer test be non-skipable?
+
 ast_fuzzer_check_digest = DigestConfig(
-    include_paths=["./tests/ci/ast_fuzzer_check.py"],
-    exclude_files=[".md"],
-    docker=["clickhouse/fuzzer"],
+    # include_paths=["./tests/ci/ast_fuzzer_check.py"],
+    # exclude_files=[".md"],
+    # docker=["clickhouse/fuzzer"],
 )
 unit_check_digest = DigestConfig(
     include_paths=["./tests/ci/unit_tests_check.py"],
@@ -166,9 +166,9 @@ perf_check_digest = DigestConfig(
     docker=["clickhouse/performance-comparison"],
 )
 sqllancer_check_digest = DigestConfig(
-    include_paths=["./tests/ci/sqlancer_check.py"],
-    exclude_files=[".md"],
-    docker=["clickhouse/sqlancer-test"],
+    # include_paths=["./tests/ci/sqlancer_check.py"],
+    # exclude_files=[".md"],
+    # docker=["clickhouse/sqlancer-test"],
 )
 sqllogic_check_digest = DigestConfig(
     include_paths=["./tests/ci/sqllogic_test.py"],
@@ -226,6 +226,7 @@ upgrade_test_common_params = {
 astfuzzer_test_common_params = {
     "digest": ast_fuzzer_check_digest,
     "run_command": "ast_fuzzer_check.py",
+    "run_always": True,
 }
 integration_test_common_params = {
     "digest": integration_check_digest,
@@ -242,6 +243,7 @@ perf_test_common_params = {
 sqllancer_test_common_params = {
     "digest": sqllancer_check_digest,
     "run_command": "sqlancer_check.py",
+    "run_always": True,
 }
 sqllogic_test_params = {
     "digest": sqllogic_check_digest,
@@ -609,9 +611,7 @@ CI_CONFIG = CiConfig(
         "Style check": TestConfig(
             "",
             job_config=JobConfig(
-                digest=DigestConfig(
-                    include_paths=["."], exclude_dirs=[".git", "__pycache__"]
-                )
+                run_always=True,
             ),
         ),
         "tests bugfix validate check": TestConfig(

From 7c281d9a6fbf95f4b2795bd0e24315aa5e4a726b Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Tue, 19 Dec 2023 23:16:03 +0100
Subject: [PATCH 123/137] Allow max_size_to_drop settings in query time
 (#57452)

---
 docs/en/operations/settings/settings.md       | 22 +++++++++++++
 src/Core/Settings.h                           |  4 +--
 src/Interpreters/Context.cpp                  | 11 ++++++-
 src/Interpreters/Context.h                    |  2 ++
 src/Storages/MergeTree/MergeTreeData.cpp      | 20 ++++++++++--
 src/Storages/MergeTree/MergeTreeData.h        |  2 +-
 src/Storages/StorageMergeTree.cpp             | 10 +++++-
 src/Storages/StorageReplicatedMergeTree.cpp   | 10 +++++-
 ...932_query_settings_max_size_drop.reference |  0
 .../02932_query_settings_max_size_drop.sql    | 31 +++++++++++++++++++
 10 files changed, 104 insertions(+), 8 deletions(-)
 create mode 100644 tests/queries/0_stateless/02932_query_settings_max_size_drop.reference
 create mode 100644 tests/queries/0_stateless/02932_query_settings_max_size_drop.sql

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index dc46a3f0dcd..b9e7c37485f 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -5134,3 +5134,25 @@ When set to `true` than for all s3 requests first two attempts are made with low
 When set to `false` than all attempts are made with identical timeouts.
 
 Default value: `true`.
+
+## max_partition_size_to_drop
+
+Restriction on dropping partitions in query time.
+
+Default value: 50 GB.
+The value 0 means that you can drop partitions without any restrictions.
+
+:::note
+This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop)
+:::
+
+## max_table_size_to_drop
+
+Restriction on deleting tables in query time.
+
+Default value: 50 GB.
+The value 0 means that you can delete all tables without any restrictions.
+
+:::note
+This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop)
+:::
\ No newline at end of file
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 7e50a81ada8..b75004a3396 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -528,8 +528,8 @@ class IColumn;
     M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \
     M(Bool, check_query_single_value_result, true, "Return check query result as single 1/0 value", 0) \
     M(Bool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries", 0) \
-    M(UInt64, max_table_size_to_drop, 0, "Only available in ClickHouse Cloud", 0) \
-    M(UInt64, max_partition_size_to_drop, 0, "Only available in ClickHouse Cloud", 0) \
+    M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
+    M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
     \
     M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \
     M(UInt64, postgresql_connection_pool_wait_timeout, 5000, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 7539a11f25e..633bca644a1 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4053,7 +4053,8 @@ void Context::checkCanBeDropped(const String & database, const String & table, c
                     "2. File '{}' intended to force DROP {}\n"
                     "How to fix this:\n"
                     "1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config\n"
-                    "2. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n"
+                    "2. Either pass a bigger (or set to zero) max_[table/partition]_size_to_drop through query settings\n"
+                    "3. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n"
                     "Example:\nsudo touch '{}' && sudo chmod 666 '{}'",
                     backQuoteIfNeed(database), backQuoteIfNeed(table),
                     size_str, max_size_to_drop_str,
@@ -4081,6 +4082,10 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab
     checkCanBeDropped(database, table, table_size, max_table_size_to_drop);
 }
 
+void Context::checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size, const size_t & max_table_size_to_drop) const
+{
+    checkCanBeDropped(database, table, table_size, max_table_size_to_drop);
+}
 
 void Context::setMaxPartitionSizeToDrop(size_t max_size)
 {
@@ -4100,6 +4105,10 @@ void Context::checkPartitionCanBeDropped(const String & database, const String &
     checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop);
 }
 
+void Context::checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const
+{
+    checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop);
+}
 
 InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional<FormatSettings> & format_settings, const std::optional<size_t> max_parsing_threads) const
 {
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 74567e54c25..a844c0aaa7e 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1084,11 +1084,13 @@ public:
     void setMaxTableSizeToDrop(size_t max_size);
     size_t getMaxTableSizeToDrop() const;
     void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const;
+    void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size, const size_t & max_table_size_to_drop) const;
 
     /// Prevents DROP PARTITION if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
     void setMaxPartitionSizeToDrop(size_t max_size);
     size_t getMaxPartitionSizeToDrop() const;
     void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size) const;
+    void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const;
 
     /// Lets you select the compression codec according to the conditions described in the configuration file.
     std::shared_ptr<ICompressionCodec> chooseCompressionCodec(size_t part_size, double part_size_ratio) const;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index d97f337c9c9..0ddeb0a6828 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4835,10 +4835,18 @@ void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, Context
         partition_size += part->getBytesOnDisk();
 
     auto table_id = getStorageID();
+
+    const auto & query_settings = local_context->getSettingsRef();
+    if (query_settings.max_partition_size_to_drop.changed)
+    {
+        getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size, query_settings.max_partition_size_to_drop);
+        return;
+    }
+
     getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size);
 }
 
-void MergeTreeData::checkPartCanBeDropped(const String & part_name)
+void MergeTreeData::checkPartCanBeDropped(const String & part_name, ContextPtr local_context)
 {
     if (!supportsReplication() && isStaticStorage())
         return;
@@ -4848,6 +4856,14 @@ void MergeTreeData::checkPartCanBeDropped(const String & part_name)
         throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in committed state", part_name);
 
     auto table_id = getStorageID();
+
+    const auto & query_settings = local_context->getSettingsRef();
+    if (query_settings.max_partition_size_to_drop.changed)
+    {
+        getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, part->getBytesOnDisk(), query_settings.max_partition_size_to_drop);
+        return;
+    }
+
     getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, part->getBytesOnDisk());
 }
 
@@ -5035,7 +5051,7 @@ Pipe MergeTreeData::alterPartition(
                 if (command.part)
                 {
                     auto part_name = command.partition->as<ASTLiteral &>().value.safeGet<String>();
-                    checkPartCanBeDropped(part_name);
+                    checkPartCanBeDropped(part_name, query_context);
                     dropPart(part_name, command.detach, query_context);
                 }
                 else
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index fc1d9085527..c69c7aaba3d 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -795,7 +795,7 @@ public:
     /// We do not use mutex because it is not very important that the size could change during the operation.
     void checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context);
 
-    void checkPartCanBeDropped(const String & part_name);
+    void checkPartCanBeDropped(const String & part_name, ContextPtr local_context);
 
     Pipe alterPartition(
         const StorageMetadataPtr & metadata_snapshot,
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index eb8c52f8936..1abb1a51361 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -280,12 +280,20 @@ StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & met
         *this, metadata_snapshot, settings.max_partitions_per_insert_block, local_context);
 }
 
-void StorageMergeTree::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const
+void StorageMergeTree::checkTableCanBeDropped(ContextPtr query_context) const
 {
     if (!supportsReplication() && isStaticStorage())
         return;
 
     auto table_id = getStorageID();
+
+    const auto & query_settings = query_context->getSettingsRef();
+    if (query_settings.max_table_size_to_drop.changed)
+    {
+        getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes(), query_settings.max_table_size_to_drop);
+        return;
+    }
+
     getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes());
 }
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 64359ddd299..dbfa88f077e 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -6423,9 +6423,17 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition(
 }
 
 
-void StorageReplicatedMergeTree::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const
+void StorageReplicatedMergeTree::checkTableCanBeDropped(ContextPtr query_context) const
 {
     auto table_id = getStorageID();
+
+    const auto & query_settings = query_context->getSettingsRef();
+    if (query_settings.max_table_size_to_drop.changed)
+    {
+        getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes(), query_settings.max_table_size_to_drop);
+        return;
+    }
+
     getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes());
 }
 
diff --git a/tests/queries/0_stateless/02932_query_settings_max_size_drop.reference b/tests/queries/0_stateless/02932_query_settings_max_size_drop.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02932_query_settings_max_size_drop.sql b/tests/queries/0_stateless/02932_query_settings_max_size_drop.sql
new file mode 100644
index 00000000000..1685861bd2e
--- /dev/null
+++ b/tests/queries/0_stateless/02932_query_settings_max_size_drop.sql
@@ -0,0 +1,31 @@
+CREATE TABLE test_max_size_drop
+Engine = MergeTree()
+ORDER BY number
+AS SELECT number
+FROM numbers(1000)
+;
+
+DROP TABLE test_max_size_drop SETTINGS max_table_size_to_drop = 1; -- { serverError 359 }
+DROP TABLE test_max_size_drop;
+
+CREATE TABLE test_max_size_drop
+Engine = MergeTree()
+ORDER BY number
+AS SELECT number
+FROM numbers(1000)
+;
+
+ALTER TABLE test_max_size_drop DROP PARTITION tuple() SETTINGS max_partition_size_to_drop = 1; -- { serverError 359 }
+ALTER TABLE test_max_size_drop DROP PARTITION tuple();
+DROP TABLE test_max_size_drop;
+
+CREATE TABLE test_max_size_drop
+Engine = MergeTree()
+ORDER BY number
+AS SELECT number
+FROM numbers(1000)
+;
+
+ALTER TABLE test_max_size_drop DROP PART 'all_1_1_0' SETTINGS max_partition_size_to_drop = 1; -- { serverError 359 }
+ALTER TABLE test_max_size_drop DROP PART 'all_1_1_0';
+DROP TABLE test_max_size_drop;

From ccff19826510c9564ac9954499fb467a5004df90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 19 Dec 2023 23:21:19 +0100
Subject: [PATCH 124/137] Rename canUseParallelReplicas to
 canUseTaskBasedParallelReplicas (#58025)

---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 4 ++--
 src/Interpreters/Context.cpp                   | 6 +++---
 src/Interpreters/Context.h                     | 2 +-
 src/Interpreters/ExpressionAnalyzer.cpp        | 5 +----
 src/Interpreters/InterpreterSelectQuery.cpp    | 2 +-
 src/Planner/Planner.cpp                        | 4 ++--
 src/Processors/QueryPlan/ReadFromRemote.cpp    | 4 ++--
 7 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 3a634f08b83..549eadcebd2 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -135,7 +135,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
     }
 
     /// disable parallel replicas if cluster contains only shards with 1 replica
-    if (context->canUseParallelReplicas())
+    if (context->canUseTaskBasedParallelReplicas())
     {
         bool disable_parallel_replicas = true;
         for (const auto & shard : cluster.getShardsInfo())
@@ -265,7 +265,7 @@ void executeQuery(
         // decide for each shard if parallel reading from replicas should be enabled
         // according to settings and number of replicas declared per shard
         const auto & addresses = cluster->getShardsAddresses().at(i);
-        bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseParallelReplicas();
+        bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseTaskBasedParallelReplicas();
 
         stream_factory.createForShard(
             shard_info,
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 633bca644a1..589d03cc074 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -5041,7 +5041,7 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const
     return SAMPLE_KEY;
 }
 
-bool Context::canUseParallelReplicas() const
+bool Context::canUseTaskBasedParallelReplicas() const
 {
     const auto & settings_ref = getSettingsRef();
     return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1;
@@ -5049,12 +5049,12 @@ bool Context::canUseParallelReplicas() const
 
 bool Context::canUseParallelReplicasOnInitiator() const
 {
-    return canUseParallelReplicas() && !getClientInfo().collaborate_with_initiator;
+    return canUseTaskBasedParallelReplicas() && !getClientInfo().collaborate_with_initiator;
 }
 
 bool Context::canUseParallelReplicasOnFollower() const
 {
-    return canUseParallelReplicas() && getClientInfo().collaborate_with_initiator;
+    return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator;
 }
 
 void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache)
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index a844c0aaa7e..39d2212ce80 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1234,7 +1234,7 @@ public:
     WriteSettings getWriteSettings() const;
 
     /** There are multiple conditions that have to be met to be able to use parallel replicas */
-    bool canUseParallelReplicas() const;
+    bool canUseTaskBasedParallelReplicas() const;
     bool canUseParallelReplicasOnInitiator() const;
     bool canUseParallelReplicasOnFollower() const;
 
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 3b389dcf61e..4f605344dd5 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -858,11 +858,8 @@ const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const
 
 bool ExpressionAnalyzer::isRemoteStorage() const
 {
-    const Settings & csettings = getContext()->getSettingsRef();
     // Consider any storage used in parallel replicas as remote, so the query is executed in multiple servers
-    const bool enable_parallel_processing_of_joins
-        = csettings.max_parallel_replicas > 1 && csettings.allow_experimental_parallel_reading_from_replicas > 0;
-    return syntax->is_remote_storage || enable_parallel_processing_of_joins;
+    return syntax->is_remote_storage || getContext()->canUseTaskBasedParallelReplicas();
 }
 
 const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 67245438156..f2d5df61f72 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -481,7 +481,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
 
     /// Check support for FINAL for parallel replicas
     bool is_query_with_final = isQueryWithFinal(query_info);
-    if (is_query_with_final && settings.allow_experimental_parallel_reading_from_replicas > 0)
+    if (is_query_with_final && context->canUseTaskBasedParallelReplicas())
     {
         if (settings.allow_experimental_parallel_reading_from_replicas == 1)
         {
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 2ab88491357..10fbf651d06 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1340,7 +1340,7 @@ void Planner::buildPlanForQueryNode()
 
     const auto & settings = query_context->getSettingsRef();
 
-    if (settings.allow_experimental_parallel_reading_from_replicas > 0)
+    if (query_context->canUseTaskBasedParallelReplicas())
     {
         const auto & table_expression_nodes = planner_context->getTableExpressionNodeToData();
         for (const auto & it : table_expression_nodes)
@@ -1366,7 +1366,7 @@ void Planner::buildPlanForQueryNode()
         }
     }
 
-    if (settings.allow_experimental_parallel_reading_from_replicas > 0 || !settings.parallel_replicas_custom_key.value.empty())
+    if (query_context->canUseTaskBasedParallelReplicas() || !settings.parallel_replicas_custom_key.value.empty())
     {
         /// Check support for JOIN for parallel replicas with custom key
         if (planner_context->getTableExpressionNodeToData().size() > 1)
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index cd88f5cc93a..f1dff279792 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -236,7 +236,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
     scalars["_shard_num"]
         = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared<DataTypeUInt32>(), "_shard_num"}};
 
-    if (context->canUseParallelReplicas())
+    if (context->canUseTaskBasedParallelReplicas())
     {
         if (context->getSettingsRef().cluster_for_parallel_replicas.changed)
         {
@@ -258,7 +258,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
         shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage);
     remote_query_executor->setLogger(log);
 
-    if (context->canUseParallelReplicas())
+    if (context->canUseTaskBasedParallelReplicas())
     {
         // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard:
         // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard.

From 2dda0ccfb79babb504714cc625bd8be0cea05bc0 Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Wed, 20 Dec 2023 10:39:07 +1100
Subject: [PATCH 125/137] Updating query cache doc with security consideration

---
 docs/en/operations/query-cache.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index def0f48b968..f174d32ed14 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -29,6 +29,11 @@ Transactionally inconsistent caching is traditionally provided by client tools o
 the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
 This reduces maintenance effort and avoids redundancy.
 
+:::security consideration
+The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed, meaning that if there are any alterations to the user's role or permissions between one cached query and the next query, the query result will not reflect these changes. We recommend using different users to distingush between different level of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
+:::
+
+
 ## Configuration Settings and Usage
 
 Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the

From 82ffb570b08aa3beefc33a9dafbfe079ed2bd4db Mon Sep 17 00:00:00 2001
From: santrancisco <san.tran@ebfe.pw>
Date: Wed, 20 Dec 2023 10:44:46 +1100
Subject: [PATCH 126/137] Remove newline

---
 docs/en/operations/query-cache.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index f174d32ed14..98895fc7b91 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -33,7 +33,6 @@ This reduces maintenance effort and avoids redundancy.
 The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed, meaning that if there are any alterations to the user's role or permissions between one cached query and the next query, the query result will not reflect these changes. We recommend using different users to distingush between different level of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
 :::
 
-
 ## Configuration Settings and Usage
 
 Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the

From 4859a74b16aa23c673af8548dcdc9058f7a2e2c8 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 20 Dec 2023 02:42:24 +0100
Subject: [PATCH 127/137] Correct values for randomization

---
 tests/clickhouse-test | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index f0167d2da8f..6d4a3e69e7e 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -612,10 +612,10 @@ class SettingsRandomizer:
         "merge_tree_coarse_index_granularity": lambda: random.randint(2, 32),
         "optimize_distinct_in_order": lambda: random.randint(0, 1),
         "max_bytes_before_external_sort": threshold_generator(
-            1.0, 0.5, 1, 10 * 1024 * 1024 * 1024
+            0.3, 0.5, 1, 10 * 1024 * 1024 * 1024
         ),
         "max_bytes_before_external_group_by": threshold_generator(
-            1.0, 0.5, 1, 10 * 1024 * 1024 * 1024
+            0.3, 0.5, 1, 10 * 1024 * 1024 * 1024
         ),
         "max_bytes_before_remerge_sort": lambda: random.randint(1, 3000000000),
         "optimize_sorting_by_input_stream_properties": lambda: random.randint(0, 1),

From d2d0c970aa1f064d6996044ceb4728818d611004 Mon Sep 17 00:00:00 2001
From: San <santrancisco@users.noreply.github.com>
Date: Wed, 20 Dec 2023 12:45:36 +1100
Subject: [PATCH 128/137] Update query-cache.md

Fixing spelling and explain clearer.
---
 docs/en/operations/query-cache.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index 98895fc7b91..781d1f9bcd5 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -30,7 +30,7 @@ the same caching logic and configuration is often duplicated. With ClickHouse's
 This reduces maintenance effort and avoids redundancy.
 
 :::security consideration
-The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed, meaning that if there are any alterations to the user's role or permissions between one cached query and the next query, the query result will not reflect these changes. We recommend using different users to distingush between different level of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
+The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
 :::
 
 ## Configuration Settings and Usage

From 98c9f830d6db8310c38864756335b78d58368956 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 20 Dec 2023 10:16:47 +0100
Subject: [PATCH 129/137] Revert "Merge pull request #55710 from
 guoxiaolongzte/clickhouse-test-add-prinln-nowTime"

This reverts commit efefad9e52227a2b2658d6c6194934bbfdaf5751, reversing
changes made to 8f7852f82d60ea9578974776f9ce286b70e1e8cf.
---
 tests/clickhouse-test | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 6d4a3e69e7e..c868d882490 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1773,7 +1773,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
         proc_name = multiprocessing.current_process().name
         print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n")
 
-    seria_num = 1
     while True:
         if is_concurrent:
             case = queue.get(timeout=args.timeout * 1.1)
@@ -1801,9 +1800,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
             test_cace_name = removesuffix(test_case.name, ".gen", ".sql") + ": "
             if not is_concurrent:
                 sys.stdout.flush()
-                sys.stdout.write(
-                    f"Case SN: {seria_num} | Current Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Case Name: {test_cace_name:72}"
-                )
+                sys.stdout.write(f"{test_cace_name:72}")
                 # This flush is needed so you can see the test name of the long
                 # running test before it will finish. But don't do it in parallel
                 # mode, so that the lines don't mix.
@@ -1849,7 +1846,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]):
         if failures_chain >= args.max_failures_chain:
             stop_tests()
             break
-        seria_num += 1
 
     if failures_total > 0:
         print(

From 4547e60c9d72bb692f66a9de081af06a0ce336d5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 19 Dec 2023 21:40:57 +0000
Subject: [PATCH 130/137] Bump Azure to v1.4.0

---
 contrib/azure                      | 2 +-
 contrib/azure-cmake/CMakeLists.txt | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/contrib/azure b/contrib/azure
index 352ff0a61cb..5b2e51d5799 160000
--- a/contrib/azure
+++ b/contrib/azure
@@ -1 +1 @@
-Subproject commit 352ff0a61cb319ac1cc38c4058443ddf70147530
+Subproject commit 5b2e51d57998df0ef4f493c93f6a2caa012e7c91
diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt
index bb44c993e79..89530e74ebc 100644
--- a/contrib/azure-cmake/CMakeLists.txt
+++ b/contrib/azure-cmake/CMakeLists.txt
@@ -12,26 +12,20 @@ file(GLOB AZURE_SDK_CORE_SRC
     "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp"
     "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp"
     "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp"
-    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.hpp"
     "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp"
-    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/winhttp/*.cpp"
     "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/io/*.cpp"
-    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/private/*.hpp"
 )
 
 file(GLOB AZURE_SDK_IDENTITY_SRC
     "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/*.cpp"
-    "${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/private/*.hpp"
 )
 
 file(GLOB AZURE_SDK_STORAGE_COMMON_SRC
     "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp"
-    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/private/*.cpp"
 )
 
 file(GLOB AZURE_SDK_STORAGE_BLOBS_SRC
     "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/*.cpp"
-    "${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.hpp"
 )
 
 file(GLOB AZURE_SDK_UNIFIED_SRC

From 7cf2a62c7e9cdf7381f8e3da05893fa0b1849253 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 20 Dec 2023 09:57:50 +0000
Subject: [PATCH 131/137] Bump Azure to 1.5.0

---
 contrib/azure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/azure b/contrib/azure
index 5b2e51d5799..309a64eb987 160000
--- a/contrib/azure
+++ b/contrib/azure
@@ -1 +1 @@
-Subproject commit 5b2e51d57998df0ef4f493c93f6a2caa012e7c91
+Subproject commit 309a64eb987dfa13d51b0f53f8a8b61cc46d8f43

From 4000342b9eb318e9d3fa474b5f4c566e1b12893c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 20 Dec 2023 11:13:53 +0000
Subject: [PATCH 132/137] Bump Azure to 1.6.0

---
 contrib/azure     | 2 +-
 contrib/boringssl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/azure b/contrib/azure
index 309a64eb987..a852d81f92f 160000
--- a/contrib/azure
+++ b/contrib/azure
@@ -1 +1 @@
-Subproject commit 309a64eb987dfa13d51b0f53f8a8b61cc46d8f43
+Subproject commit a852d81f92f153e109de165ee08546741e3f2a68
diff --git a/contrib/boringssl b/contrib/boringssl
index 8061ac62d67..aa6d2f865a2 160000
--- a/contrib/boringssl
+++ b/contrib/boringssl
@@ -1 +1 @@
-Subproject commit 8061ac62d67953e61b793042e33baf1352e67510
+Subproject commit aa6d2f865a2eab01cf94f197e11e36b6de47b5b4

From cba28c9bd0702d9fc3ac1d1e48a119c19ce735eb Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 20 Dec 2023 11:53:07 +0000
Subject: [PATCH 133/137] Update test

---
 tests/queries/0_stateless/02884_parallel_window_functions.sql | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql
index 3151b42f896..c5ab013a198 100644
--- a/tests/queries/0_stateless/02884_parallel_window_functions.sql
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql
@@ -1,9 +1,11 @@
+-- Tags: long, no-tsan, no-asan, no-ubsan, no-msan, no-debug
+
 CREATE TABLE window_funtion_threading
 Engine = MergeTree
 ORDER BY (ac, nw)
 AS SELECT
         toUInt64(toFloat32(number % 2) % 20000000) as ac,
-        toFloat32(1) as wg,        
+        toFloat32(1) as wg,
         toUInt16(toFloat32(number % 3) % 400) as nw
 FROM numbers_mt(10000000);
 

From 3beddc8d2d43acb1fd688daec6682b94775999e0 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 20 Dec 2023 13:46:21 +0100
Subject: [PATCH 134/137] fix typo

---
 .../02479_race_condition_between_insert_and_droppin_mv.sh       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
index 5d9844d5030..9ce4b459fce 100755
--- a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
+++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
@@ -42,7 +42,7 @@ TIMEOUT=55
 
 for i in {1..4}
 do
-    timeout $TIMEOUT bash -c drop_mv $i &
+    timeout $TIMEOUT bash -c "drop_mv $i" &
 done
 
 for i in {1..4}

From d57ac6dfdb07883373b18464d39e6f390d4eb9df Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 20 Dec 2023 17:33:05 +0100
Subject: [PATCH 135/137] Set replica number to its position in cluster
 definition (#57800)

* impl

* fix
---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 9 +++------
 src/Processors/QueryPlan/ReadFromRemote.cpp    | 8 ++++++--
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 549eadcebd2..18f7280dd19 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -382,7 +382,6 @@ void executeQueryWithParallelReplicas(
         shard_num = column->getUInt(0);
     }
 
-    size_t all_replicas_count = 0;
     ClusterPtr new_cluster;
     /// if got valid shard_num from query initiator, then parallel replicas scope is the specified shard
     /// shards are numbered in order of appearance in the cluster config
@@ -406,16 +405,14 @@ void executeQueryWithParallelReplicas(
         // shard_num is 1-based, but getClusterWithSingleShard expects 0-based index
         auto single_shard_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1);
         // convert cluster to representation expected by parallel replicas
-        new_cluster = single_shard_cluster->getClusterWithReplicasAsShards(settings);
+        new_cluster = single_shard_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
     }
     else
     {
-        new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings);
+        new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
     }
 
-    all_replicas_count = std::min(static_cast<size_t>(settings.max_parallel_replicas), new_cluster->getShardCount());
-
-    auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(all_replicas_count);
+    auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(new_cluster->getShardCount());
     auto external_tables = new_context->getExternalTables();
     auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
         query_ast,
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index f1dff279792..0d1fae0d239 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -367,7 +367,9 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
             IConnections::ReplicaInfo replica_info
             {
                 .all_replicas_count = all_replicas_count,
-                .number_of_current_replica = 0
+                /// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`).
+                /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it.
+                .number_of_current_replica = shard.shard_num - 1,
             };
 
             addPipeForSingeReplica(pipes, shard.pool, replica_info);
@@ -386,7 +388,9 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
         IConnections::ReplicaInfo replica_info
         {
             .all_replicas_count = all_replicas_count,
-            .number_of_current_replica = pipes.size()
+            /// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`).
+            /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it.
+            .number_of_current_replica = current_shard->shard_num - 1,
         };
 
         addPipeForSingeReplica(pipes, current_shard->pool, replica_info);

From 9def89d416a94659b7b776e939329fbe71e7681b Mon Sep 17 00:00:00 2001
From: Christoph Wurm <christoph@clickhouse.com>
Date: Wed, 20 Dec 2023 18:26:36 +0000
Subject: [PATCH 136/137] Fix anchors to settings.md

---
 .../table-engines/integrations/embedded-rocksdb.md     |  2 +-
 docs/en/engines/table-engines/integrations/hdfs.md     |  2 +-
 docs/en/engines/table-engines/integrations/kafka.md    |  4 ++--
 docs/en/engines/table-engines/integrations/nats.md     |  2 +-
 docs/en/engines/table-engines/integrations/rabbitmq.md |  2 +-
 docs/en/engines/table-engines/integrations/s3.md       |  2 +-
 docs/en/engines/table-engines/special/distributed.md   |  6 +++---
 docs/en/engines/table-engines/special/file.md          |  4 ++--
 docs/en/engines/table-engines/special/filelog.md       |  2 +-
 docs/en/interfaces/http.md                             | 10 +++++-----
 docs/en/operations/monitoring.md                       |  2 +-
 .../optimizing-performance/sampling-query-profiler.md  |  2 +-
 docs/en/operations/query-cache.md                      |  4 ++--
 .../server-configuration-parameters/settings.md        |  6 +++---
 docs/en/operations/settings/query-complexity.md        |  2 +-
 docs/en/operations/settings/settings.md                |  6 +++---
 docs/en/operations/system-tables/clusters.md           |  4 ++--
 docs/en/operations/system-tables/query_log.md          |  6 +++---
 docs/en/operations/system-tables/query_thread_log.md   |  2 +-
 docs/en/operations/system-tables/query_views_log.md    |  2 +-
 docs/en/operations/system-tables/table_engines.md      |  2 +-
 .../aggregate-functions/reference/count.md             |  2 +-
 docs/en/sql-reference/functions/array-functions.md     |  2 +-
 docs/en/sql-reference/functions/introspection.md       |  2 +-
 docs/en/sql-reference/statements/select/join.md        | 10 +++++-----
 docs/en/sql-reference/syntax.md                        |  2 +-
 docs/en/sql-reference/table-functions/cluster.md       |  4 ++--
 docs/en/sql-reference/table-functions/file.md          |  4 ++--
 docs/en/sql-reference/table-functions/hdfs.md          |  2 +-
 docs/en/sql-reference/table-functions/remote.md        |  2 +-
 docs/en/sql-reference/table-functions/s3.md            |  2 +-
 31 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
index 9af857b0835..44febe78c77 100644
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@@ -212,5 +212,5 @@ ORDER BY key ASC
 ```
 
 ### More information on Joins
-- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#settings-join_algorithm)
+- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#join_algorithm)
 - [JOIN clause](/docs/en/sql-reference/statements/select/join.md)
diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md
index 19221c256f9..96e6bab6997 100644
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@@ -236,7 +236,7 @@ libhdfs3 support HDFS namenode HA.
 
 ## Storage Settings {#storage-settings}
 
-- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
 - [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 
diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md
index de1a090d491..141d87fed20 100644
--- a/docs/en/engines/table-engines/integrations/kafka.md
+++ b/docs/en/engines/table-engines/integrations/kafka.md
@@ -54,7 +54,7 @@ Optional parameters:
 
 - `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
 - `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`.
-- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
+- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size).
 - `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`.
 - `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`.
 - `kafka_client_id` — Client identifier. Empty by default.
@@ -151,7 +151,7 @@ Example:
 
   SELECT level, sum(total) FROM daily GROUP BY level;
 ```
-To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/settings/settings.md/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
+To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/settings/settings.md/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
 
 To stop receiving topic data or to change the conversion logic, detach the materialized view:
 
diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md
index 37a41159fab..e898d1f1b82 100644
--- a/docs/en/engines/table-engines/integrations/nats.md
+++ b/docs/en/engines/table-engines/integrations/nats.md
@@ -58,7 +58,7 @@ Optional parameters:
 - `nats_reconnect_wait` – Amount of time in milliseconds to sleep between each reconnect attempt. Default: `5000`.
 - `nats_server_list` - Server list for connection. Can be specified to connect to NATS cluster.
 - `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data).
-- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
+- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size).
 - `nats_flush_interval_ms` - Timeout for flushing data read from NATS. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms).
 - `nats_username` - NATS username.
 - `nats_password` - NATS password.
diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index 53c6e089a70..0f3fef3d6fb 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -65,7 +65,7 @@ Optional parameters:
 - `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified.
 - `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`.
 - `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). Default: `0`.
-- `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
+- `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size).
 - `rabbitmq_flush_interval_ms` - Timeout for flushing data from RabbitMQ. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms).
 - `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue.
 - `rabbitmq_address` - Address for connection. Use ether this setting or `rabbitmq_host_port`.
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index 3144bdd32fa..dfa06801d04 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -222,7 +222,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
 
 ## Storage Settings {#storage-settings}
 
-- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
 - [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 
diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md
index 6224c450ea2..de8ae0357dc 100644
--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@@ -112,7 +112,7 @@ Specifying the `sharding_key` is necessary for the following:
 For **Insert limit settings** (`..._insert`) see also:
 
 - [distributed_foreground_insert](../../../operations/settings/settings.md#distributed_foreground_insert) setting
-- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) setting
+- [prefer_localhost_replica](../../../operations/settings/settings.md#prefer-localhost-replica) setting
 - `bytes_to_throw_insert` handled before `bytes_to_delay_insert`, so you should not set it to the value less then `bytes_to_delay_insert`
 :::
 
@@ -198,7 +198,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com
 - `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and be configured with correct certificates.
 - `compression` - Use data compression. Default value: `true`.
 
-When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.
+When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.
 
 You can specify just one of the shards (in this case, query processing should be called remote, rather than distributed) or up to any number of shards. In each shard, you can specify from one to any number of replicas. You can specify a different number of replicas for each shard.
 
@@ -243,7 +243,7 @@ If the server ceased to exist or had a rough restart (for example, due to a hard
 
 When querying a `Distributed` table, `SELECT` queries are sent to all shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
 
-When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
+When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#max_parallel_replicas).
 
 To learn more about how distributed `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
 
diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md
index 6e3897398a5..fdf5242ba3b 100644
--- a/docs/en/engines/table-engines/special/file.md
+++ b/docs/en/engines/table-engines/special/file.md
@@ -101,8 +101,8 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
 
 ## Settings {#settings}
 
-- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
+- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
 - [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
 - [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
-- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
+- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
diff --git a/docs/en/engines/table-engines/special/filelog.md b/docs/en/engines/table-engines/special/filelog.md
index eef9a17444e..82201053bc5 100644
--- a/docs/en/engines/table-engines/special/filelog.md
+++ b/docs/en/engines/table-engines/special/filelog.md
@@ -41,7 +41,7 @@ Optional parameters:
 
 - `poll_timeout_ms` - Timeout for single poll from log file. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms).
 - `poll_max_batch_size` — Maximum amount of records to be polled in a single poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size).
-- `max_block_size` — The maximum batch size (in records) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
+- `max_block_size` — The maximum batch size (in records) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size).
 - `max_threads` - Number of max threads to parse files, default is 0, which means the number will be max(1, physical_cpu_cores / 4).
 - `poll_directory_watch_events_backoff_init` - The initial sleep value for watch directory thread. Default: `500`.
 - `poll_directory_watch_events_backoff_max` - The max sleep value for watch directory thread. Default: `32000`.
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 63f75fb7830..4eeb19cefcf 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -167,7 +167,7 @@ For successful requests that do not return a data table, an empty response body
 
 You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed.
 
-You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
+You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#http_native_compression_disable_checksumming_on_decompress) setting.
 
 If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method.
 
@@ -183,7 +183,7 @@ You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP
 - `snappy`
 
 To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
-In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.
+In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#http_zlib_compression_level) setting for all compression methods.
 
 :::info
 Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly.
@@ -285,7 +285,7 @@ For information about other parameters, see the section “SET”.
 
 Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to add the `session_id` GET parameter to the request. You can use any string as the session ID. By default, the session is terminated after 60 seconds of inactivity. To change this timeout, modify the `default_session_timeout` setting in the server configuration, or add the `session_timeout` GET parameter to the request. To check the session status, use the `session_check=1` parameter. Only one query at a time can be executed within a single session.
 
-You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
+You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#send_progress_in_http_headers). Example of the header sequence:
 
 ``` text
 X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334"}
@@ -496,7 +496,7 @@ Next are the configuration methods for different `type`.
 
 `query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
 
-The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully.
+The following example defines the values of [max_threads](../operations/settings/settings.md#max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully.
 
 :::note
 To keep the default `handlers` such as` query`, `play`,` ping`, add the `<defaults/>` rule.
@@ -539,7 +539,7 @@ In `dynamic_query_handler`, the query is written in the form of parameter of the
 
 ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the parameter is not passed in.
 
-To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully.
+To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#max_threads) and `max_final_threads` and `queries` whether the settings were set successfully.
 
 Example:
 
diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md
index adc384e21ae..de61da6f5c4 100644
--- a/docs/en/operations/monitoring.md
+++ b/docs/en/operations/monitoring.md
@@ -64,4 +64,4 @@ You can configure ClickHouse to export metrics to [Prometheus](https://prometheu
 
 Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`.
 
-To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.
+To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.
diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md
index 206f710734e..194d2714422 100644
--- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md
+++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md
@@ -42,7 +42,7 @@ To analyze the `trace_log` system table:
 
 - Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages).
 
-- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting.
+- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#allow_introspection_functions) setting.
 
     For security reasons, introspection functions are disabled by default.
 
diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index 781d1f9bcd5..0b858038caf 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -103,7 +103,7 @@ It is also possible to limit the cache usage of individual users using [settings
 constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may
 allocate in the query cache and the maximum number of stored query results. For that, first provide configurations
 [query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and
-[query_cache_max_entries](settings/settings.md#query-cache-size-max-entries) in a user profile in `users.xml`, then make both settings
+[query_cache_max_entries](settings/settings.md#query-cache-max-entries) in a user profile in `users.xml`, then make both settings
 readonly:
 
 ``` xml
@@ -144,7 +144,7 @@ value can be specified at session, profile or query level using setting [query_c
 Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
 from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
 
-ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#settings-max_block_size) rows. Due to filtering, aggregation,
+ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
 etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
 [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
 are squashed (if they are tiny) or split (if they are large) into blocks of 'max_block_size' size before insertion into the query result
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 01e30c84526..48434d992e2 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -2009,7 +2009,7 @@ Data for the query cache is allocated in DRAM. If memory is scarce, make sure to
 
 ## query_thread_log {#query_thread_log}
 
-Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.
+Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#log-query-threads) setting.
 
 Queries are logged in the [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
 
@@ -2051,7 +2051,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the
 
 ## query_views_log {#query_views_log}
 
-Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views) setting.
+Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#log-query-views) setting.
 
 Queries are logged in the [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
 
@@ -2331,7 +2331,7 @@ For the value of the `incl` attribute, see the section “[Configuration files](
 
 **See Also**
 
-- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
+- [skip_unavailable_shards](../../operations/settings/settings.md#skip_unavailable_shards)
 - [Cluster Discovery](../../operations/cluster-discovery.md)
 - [Replicated database engine](../../engines/database-engines/replicated.md)
 
diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 9e36aa26946..1cb7ec9dced 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -139,7 +139,7 @@ Limit on the number of bytes in the result. The same as the previous setting.
 
 What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
 
-Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#settings-max_threads).
+Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#max_threads).
 
 Example:
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index b9e7c37485f..6e087467bb9 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1716,7 +1716,7 @@ Default value: `1`
 
 ## query_cache_squash_partial_results {#query-cache-squash-partial-results}
 
-Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query_cache_compress_entries)).
+Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query-cache-compress-entries)).
 
 Possible values:
 
@@ -2486,7 +2486,7 @@ See also:
 - [load_balancing](#load_balancing-round_robin)
 - [Table engine Distributed](../../engines/table-engines/special/distributed.md)
 - [distributed_replica_error_cap](#distributed_replica_error_cap)
-- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life)
+- [distributed_replica_error_half_life](#distributed_replica_error_half_life)
 
 ## distributed_background_insert_sleep_time_ms {#distributed_background_insert_sleep_time_ms}
 
@@ -4715,7 +4715,7 @@ Possible values:
 
 Default value: `false`.
 
-## rename_files_after_processing
+## rename_files_after_processing {#rename_files_after_processing}
 
 - **Type:** String
 
diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md
index 2659f80e338..63cc083e4bc 100644
--- a/docs/en/operations/system-tables/clusters.md
+++ b/docs/en/operations/system-tables/clusters.md
@@ -78,5 +78,5 @@ is_active:               NULL
 **See Also**
 
 - [Table engine Distributed](../../engines/table-engines/special/distributed.md)
-- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap)
-- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life)
+- [distributed_replica_error_cap setting](../../operations/settings/settings.md#distributed_replica_error_cap)
+- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#distributed_replica_error_half_life)
diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md
index 4f5e214f1ce..7fcc4928355 100644
--- a/docs/en/operations/system-tables/query_log.md
+++ b/docs/en/operations/system-tables/query_log.md
@@ -11,7 +11,7 @@ This table does not contain the ingested data for `INSERT` queries.
 
 You can change settings of queries logging in the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration.
 
-You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#settings-log-queries). We do not recommend to turn off logging because information in this table is important for solving issues.
+You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#log-queries). We do not recommend to turn off logging because information in this table is important for solving issues.
 
 The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
 
@@ -30,7 +30,7 @@ Each query creates one or two rows in the `query_log` table, depending on the st
 
 You can use the [log_queries_probability](../../operations/settings/settings.md#log-queries-probability) setting to reduce the number of queries, registered in the `query_log` table.
 
-You can use the [log_formatted_queries](../../operations/settings/settings.md#settings-log-formatted-queries) setting to log formatted queries to the `formatted_query` column.
+You can use the [log_formatted_queries](../../operations/settings/settings.md#log-formatted-queries) setting to log formatted queries to the `formatted_query` column.
 
 Columns:
 
@@ -101,7 +101,7 @@ Columns:
 - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
 - `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
 - `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
-- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
+- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#max_query_size). An empty string if it is not defined.
 - `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously.
 - `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query.
 - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md
index a198d7c304f..0420a0392f2 100644
--- a/docs/en/operations/system-tables/query_thread_log.md
+++ b/docs/en/operations/system-tables/query_thread_log.md
@@ -8,7 +8,7 @@ Contains information about threads that execute queries, for example, thread nam
 To start logging:
 
 1.  Configure parameters in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
-2.  Set [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1.
+2.  Set [log_query_threads](../../operations/settings/settings.md#log-query-threads) to 1.
 
 The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
 
diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md
index 4dd8dd7420d..41a69da70aa 100644
--- a/docs/en/operations/system-tables/query_views_log.md
+++ b/docs/en/operations/system-tables/query_views_log.md
@@ -8,7 +8,7 @@ Contains information about the dependent views executed when running a query, fo
 To start logging:
 
 1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section.
-2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1.
+2. Set [log_query_views](../../operations/settings/settings.md#log-query-views) to 1.
 
 The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
 
diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md
index 08594739ecf..56668abae31 100644
--- a/docs/en/operations/system-tables/table_engines.md
+++ b/docs/en/operations/system-tables/table_engines.md
@@ -14,7 +14,7 @@ This table contains the following columns (the column type is shown in brackets)
 - `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`.
 - `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md).
 - `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication.
-- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting).
+- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#max-insert-threads) setting).
 
 Example:
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md
index a40108a331a..ca4067c8d8c 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/count.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/count.md
@@ -28,7 +28,7 @@ In both cases the type of the returned value is [UInt64](../../../sql-reference/
 
 **Details**
 
-ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function.
+ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function.
 
 The `SELECT count() FROM table` query is optimized by default using metadata from MergeTree. If you need to use row-level security, disable optimization using the [optimize_trivial_count_query](../../../operations/settings/settings.md#optimize-trivial-count-query) setting.
 
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 00efa63c960..f5da00a8663 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -143,7 +143,7 @@ range([start, ] end [, step])
 **Implementation details**
 
 - All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments.
-- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
+- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#function_range_max_elements_in_block) setting.
 - Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type).
 
 **Examples**
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index 8cb35483555..1025b8bdc3d 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -16,7 +16,7 @@ For proper operation of introspection functions:
 
 - Install the `clickhouse-common-static-dbg` package.
 
-- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1.
+- Set the [allow_introspection_functions](../../operations/settings/settings.md#allow_introspection_functions) setting to 1.
 
         For security reasons introspection functions are disabled by default.
 
diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md
index 281a1d0436c..5bec89f382e 100644
--- a/docs/en/sql-reference/statements/select/join.md
+++ b/docs/en/sql-reference/statements/select/join.md
@@ -45,20 +45,20 @@ Additional join types available in ClickHouse:
 - `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below.
 
 :::note
-When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
+When [join_algorithm](../../../operations/settings/settings.md#join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
 :::
 
 ## Settings
 
-The default join type can be overridden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting.
+The default join type can be overridden using [join_default_strictness](../../../operations/settings/settings.md#join_default_strictness) setting.
 
 The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting.
 
 
 **See also**
 
-- [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm)
-- [join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row)
+- [join_algorithm](../../../operations/settings/settings.md#join_algorithm)
+- [join_any_take_last_row](../../../operations/settings/settings.md#join_any_take_last_row)
 - [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls)
 - [partial_merge_join_optimizations](../../../operations/settings/settings.md#partial_merge_join_optimizations)
 - [partial_merge_join_rows_in_right_blocks](../../../operations/settings/settings.md#partial_merge_join_rows_in_right_blocks)
@@ -352,7 +352,7 @@ If you need a `JOIN` for joining with dimension tables (these are relatively sma
 
 ### Memory Limitations
 
-By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the right_table and creates a hash table for it in RAM. If `join_algorithm = 'auto'` is enabled, then after some threshold of memory consumption, ClickHouse falls back to [merge](https://en.wikipedia.org/wiki/Sort-merge_join) join algorithm. For `JOIN` algorithms description see the [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) setting.
+By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the right_table and creates a hash table for it in RAM. If `join_algorithm = 'auto'` is enabled, then after some threshold of memory consumption, ClickHouse falls back to [merge](https://en.wikipedia.org/wiki/Sort-merge_join) join algorithm. For `JOIN` algorithms description see the [join_algorithm](../../../operations/settings/settings.md#join_algorithm) setting.
 
 If you need to restrict `JOIN` operation memory consumption use the following settings:
 
diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md
index f5651c2dcb6..6dcb3e75e48 100644
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@@ -16,7 +16,7 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
 
 The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions).
 
-Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed.
+Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed.
 It allows for avoiding issues with large `INSERT` queries.
 
 When using the `Values` format in an `INSERT` query, it may seem that data is parsed the same as expressions in a `SELECT` query, but this is not true. The `Values` format is much more limited.
diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md
index a083c6b89a6..ad92ab39183 100644
--- a/docs/en/sql-reference/table-functions/cluster.md
+++ b/docs/en/sql-reference/table-functions/cluster.md
@@ -55,5 +55,5 @@ Connection settings like `host`, `port`, `user`, `password`, `compression`, `sec
 
 **See Also**
 
-- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
-- [load_balancing](../../operations/settings/settings.md#settings-load_balancing)
+- [skip_unavailable_shards](../../operations/settings/settings.md#skip_unavailable_shards)
+- [load_balancing](../../operations/settings/settings.md#load_balancing)
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index f0de4a405a0..3a63811add6 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -199,11 +199,11 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
 
 ## Settings {#settings}
 
-- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
+- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
 - [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
 - [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
-- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
+- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
 
 
 **See Also**
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index 463632f4e07..92f904b8841 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -100,7 +100,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
 
 ## Storage Settings {#storage-settings}
 
-- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
 - [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 - [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md
index 3ca177050d3..228f4a4c7e1 100644
--- a/docs/en/sql-reference/table-functions/remote.md
+++ b/docs/en/sql-reference/table-functions/remote.md
@@ -165,5 +165,5 @@ The following pattern types are supported.
 - `{0n..0m}` - A range of numbers with leading zeroes. This pattern preserves leading zeroes in indices. For instance, `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`.
 - `{a|b}` - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`.
 
-The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting.
+The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#load_balancing) setting.
 The number of generated addresses is limited by [table_function_remote_max_addresses](../../operations/settings/settings.md#table_function_remote_max_addresses) setting.
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index dc11259c626..bcad284016d 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -236,7 +236,7 @@ LIMIT 5;
 
 ## Storage Settings {#storage-settings}
 
-- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
 - [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 

From e0a790de1e9839168dbd281051092be3c4e9d897 Mon Sep 17 00:00:00 2001
From: Max K <max.kainov@clickhouse.com>
Date: Wed, 20 Dec 2023 21:28:54 +0100
Subject: [PATCH 137/137] Fix for nightly job for digest-ci (#58079)

* Fix for run_always job - do not set done
 #no-merge-commit
---
 tests/ci/ci.py      | 39 +++++++++++---------
 tests/ci/pr_info.py | 89 ++++++++-------------------------------------
 2 files changed, 36 insertions(+), 92 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 01317b49e1a..4378e4361f0 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -279,11 +279,11 @@ def _configure_docker_jobs(
     images_info = docker_images_helper.get_images_info()
 
     # a. check missing images
-    print("Start checking missing images in dockerhub")
-    # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
-    #         find if it's possible to use the setting of /etc/docker/daemon.json
-    docker_images_helper.docker_login()
     if not rebuild_all_dockers:
+        # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
+        #         find if it's possible to use the setting of /etc/docker/daemon.json
+        docker_images_helper.docker_login()
+        print("Start checking missing images in dockerhub")
         missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict)
         missing_multi = list(missing_multi_dict)
         missing_amd64 = []
@@ -305,6 +305,15 @@ def _configure_docker_jobs(
                     "aarch64",
                 )
             )
+        # FIXME: temporary hack, remove after transition to docker digest as tag
+        else:
+            if missing_multi:
+                print(
+                    f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag"
+                )
+                for image in missing_multi:
+                    imagename_digest_dict[image] = "latest"
+        print("...checking missing images in dockerhub - done")
     else:
         # add all images to missing
         missing_multi = list(imagename_digest_dict)
@@ -315,16 +324,7 @@ def _configure_docker_jobs(
             for name in imagename_digest_dict
             if not images_info[name]["only_amd64"]
         ]
-    # FIXME: temporary hack, remove after transition to docker digest as tag
-    if docker_digest_or_latest:
-        if missing_multi:
-            print(
-                f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag"
-            )
-            for image in missing_multi:
-                imagename_digest_dict[image] = "latest"
 
-    print("...checking missing images in dockerhub - done")
     return {
         "images": imagename_digest_dict,
         "missing_aarch64": missing_aarch64,
@@ -548,14 +548,14 @@ def main() -> int:
 
     if args.configure:
         GR = GitRunner()
-        pr_info = PRInfo(need_changed_files=True)
+        pr_info = PRInfo()
 
         docker_data = {}
         git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD")
 
         # if '#no-merge-commit' is set in commit message - set git ref to PR branch head to avoid merge-commit
         tokens = []
-        if pr_info.number != 0:
+        if pr_info.number != 0 and not args.skip_jobs:
             message = GR.run(f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1")
             tokens = _fetch_commit_tokens(message)
             print(f"Found commit message tokens: [{tokens}]")
@@ -689,7 +689,8 @@ def main() -> int:
     elif args.mark_success:
         assert indata, "Run config must be provided via --infile"
         job = args.job_name
-        num_batches = CI_CONFIG.get_job_config(job).num_batches
+        job_config = CI_CONFIG.get_job_config(job)
+        num_batches = job_config.num_batches
         assert (
             num_batches <= 1 or 0 <= args.batch < num_batches
         ), f"--batch must be provided and in range [0, {num_batches}) for {job}"
@@ -706,7 +707,7 @@ def main() -> int:
             if not CommitStatusData.is_present():
                 # apparently exit after rerun-helper check
                 # do nothing, exit without failure
-                print("ERROR: no status file for job [{job}]")
+                print(f"ERROR: no status file for job [{job}]")
                 job_status = CommitStatusData(
                     status="dummy failure",
                     description="dummy status",
@@ -717,7 +718,9 @@ def main() -> int:
                 job_status = CommitStatusData.load_status()
 
         # Storing job data (report_url) to restore OK GH status on job results reuse
-        if job_status.is_ok():
+        if job_config.run_always:
+            print(f"Job [{job}] runs always in CI - do not mark as done")
+        elif job_status.is_ok():
             success_flag_name = get_file_flag_name(
                 job, indata["jobs_data"]["digests"][job], args.batch, num_batches
             )
diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py
index c023ca048d6..15558c81c7e 100644
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@@ -2,7 +2,7 @@
 import json
 import logging
 import os
-from typing import Dict, List, Set, Union, Literal
+from typing import Dict, List, Set, Union
 
 from unidiff import PatchSet  # type: ignore
 
@@ -93,6 +93,7 @@ class PRInfo:
                 github_event = PRInfo.default_event.copy()
         self.event = github_event
         self.changed_files = set()  # type: Set[str]
+        self.changed_files_requested = False
         self.body = ""
         self.diff_urls = []  # type: List[str]
         # release_pr and merged_pr are used for docker images additional cache
@@ -285,6 +286,7 @@ class PRInfo:
             response.raise_for_status()
             diff_object = PatchSet(response.text)
             self.changed_files.update({f.path for f in diff_object})
+        self.changed_files_requested = True
         print(f"Fetched info about {len(self.changed_files)} changed files")
 
     def get_dict(self):
@@ -297,9 +299,10 @@ class PRInfo:
         }
 
     def has_changes_in_documentation(self) -> bool:
-        # If the list wasn't built yet the best we can do is to
-        # assume that there were changes.
-        if self.changed_files is None or not self.changed_files:
+        if not self.changed_files_requested:
+            self.fetch_changed_files()
+
+        if not self.changed_files:
             return True
 
         for f in self.changed_files:
@@ -316,7 +319,11 @@ class PRInfo:
         checks if changes are docs related without other changes
         FIXME: avoid hardcoding filenames here
         """
+        if not self.changed_files_requested:
+            self.fetch_changed_files()
+
         if not self.changed_files:
+            # if no changes at all return False
             return False
 
         for f in self.changed_files:
@@ -332,7 +339,10 @@ class PRInfo:
         return True
 
     def has_changes_in_submodules(self):
-        if self.changed_files is None or not self.changed_files:
+        if not self.changed_files_requested:
+            self.fetch_changed_files()
+
+        if not self.changed_files:
             return True
 
         for f in self.changed_files:
@@ -340,75 +350,6 @@ class PRInfo:
                 return True
         return False
 
-    def can_skip_builds_and_use_version_from_master(self):
-        if FORCE_TESTS_LABEL in self.labels:
-            return False
-
-        if self.changed_files is None or not self.changed_files:
-            return False
-
-        return not any(
-            f.startswith("programs")
-            or f.startswith("src")
-            or f.startswith("base")
-            or f.startswith("cmake")
-            or f.startswith("rust")
-            or f == "CMakeLists.txt"
-            or f == "tests/ci/build_check.py"
-            for f in self.changed_files
-        )
-
-    def can_skip_integration_tests(self, versions: List[str]) -> bool:
-        if FORCE_TESTS_LABEL in self.labels:
-            return False
-
-        # If docker image(s) relevant to integration tests are updated
-        if any(self.sha in version for version in versions):
-            return False
-
-        if self.changed_files is None or not self.changed_files:
-            return False
-
-        if not self.can_skip_builds_and_use_version_from_master():
-            return False
-
-        # Integration tests can be skipped if integration tests are not changed
-        return not any(
-            f.startswith("tests/integration/")
-            or f == "tests/ci/integration_test_check.py"
-            for f in self.changed_files
-        )
-
-    def can_skip_functional_tests(
-        self, version: str, test_type: Literal["stateless", "stateful"]
-    ) -> bool:
-        if FORCE_TESTS_LABEL in self.labels:
-            return False
-
-        # If docker image(s) relevant to functional tests are updated
-        if self.sha in version:
-            return False
-
-        if self.changed_files is None or not self.changed_files:
-            return False
-
-        if not self.can_skip_builds_and_use_version_from_master():
-            return False
-
-        # Functional tests can be skipped if queries tests are not changed
-        if test_type == "stateless":
-            return not any(
-                f.startswith("tests/queries/0_stateless")
-                or f == "tests/ci/functional_test_check.py"
-                for f in self.changed_files
-            )
-        else:  # stateful
-            return not any(
-                f.startswith("tests/queries/1_stateful")
-                or f == "tests/ci/functional_test_check.py"
-                for f in self.changed_files
-            )
-
 
 class FakePRInfo:
     def __init__(self):