From 29da9f96458e421796e4d319f730a9fbb485f886 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 2 Nov 2023 04:31:01 +0000
Subject: [PATCH 01/80] Fix ClickHouse-sourced dictionaries with explicit query

---
 src/Dictionaries/ExternalQueryBuilder.cpp     |  8 +++++---
 .../02907_clickhouse_dictionary_bug.reference |  1 +
 .../02907_clickhouse_dictionary_bug.sh        | 20 +++++++++++++++++++
 3 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02907_clickhouse_dictionary_bug.reference
 create mode 100755 tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh

diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp
index e21b0842e11..792c4e3e907 100644
--- a/src/Dictionaries/ExternalQueryBuilder.cpp
+++ b/src/Dictionaries/ExternalQueryBuilder.cpp
@@ -396,18 +396,20 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery(
     }
     else
     {
-        writeString(query, out);
-
         auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE);
         if (condition_position == std::string::npos)
         {
-            writeString(" WHERE ", out);
+            writeString("SELECT * FROM (", out);
+            writeString(query, out);
+            writeString(") WHERE ", out);
             composeKeysCondition(key_columns, requested_rows, method, partition_key_prefix, out);
             writeString(";", out);
 
             return out.str();
         }
 
+        writeString(query, out);
+
         WriteBufferFromOwnString condition_value_buffer;
         composeKeysCondition(key_columns, requested_rows, method, partition_key_prefix, condition_value_buffer);
         const auto & condition_value = condition_value_buffer.str();
diff --git a/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.reference b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.reference
new file mode 100644
index 00000000000..61780798228
--- /dev/null
+++ b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.reference
@@ -0,0 +1 @@
+b
diff --git a/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh
new file mode 100755
index 00000000000..57182050534
--- /dev/null
+++ b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Tags: zookeeper
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} -n -q "
+    DROP DICTIONARY IF EXISTS 02907_dictionary;
+    DROP TABLE IF EXISTS 02907_table;
+
+    CREATE TABLE 02907_table (A String, B String) ENGINE=Memory AS SELECT 'a', 'b';
+    CREATE DICTIONARY 02907_dictionary(A String, B String) PRIMARY KEY A
+    SOURCE(CLICKHOUSE(QUERY \$\$  SELECT A, B FROM ${CLICKHOUSE_DATABASE}.02907_table ORDER BY A DESC LIMIT 1 BY A  \$\$))
+    LAYOUT(complex_key_direct());
+
+    SELECT dictGet('02907_dictionary','B','a');
+
+    DROP DICTIONARY 02907_dictionary;
+    DROP TABLE 02907_table;"

From 782d67766d732f497889be7c08829814348938ae Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Wed, 1 Nov 2023 21:56:20 -0700
Subject: [PATCH 02/80] [Docs] Specify IAM role necessary for GCS table
 function

---
 docs/en/sql-reference/table-functions/gcs.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md
index 48c2381696e..c49ae6a8501 100644
--- a/docs/en/sql-reference/table-functions/gcs.md
+++ b/docs/en/sql-reference/table-functions/gcs.md
@@ -7,7 +7,7 @@ keywords: [gcs, bucket]
 
 # gcs Table Function
 
-Provides a table-like interface to select/insert files in [Google Cloud Storage](https://cloud.google.com/storage/).
+Provides a table-like interface to `SELECT` and `INSERT` data from [Google Cloud Storage](https://cloud.google.com/storage/). Requires the [`Storage Object User` IAM role](https://cloud.google.com/storage/docs/access-control/iam-roles).
 
 **Syntax**
 

From 9021667f82eace29da4cb60d59070b3a3bb7a0cd Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 2 Nov 2023 08:44:03 +0000
Subject: [PATCH 03/80] Fix link to failed check report in status commit

---
 tests/ci/commit_status_helper.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index aeeb8531aac..09e3478b3fc 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -242,12 +242,12 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str:
     for desc, gs in grouped_statuses.items():
         state = get_worst_state(gs)
         state_text = f"{STATUS_ICON_MAP[state]} {state}"
-        # take the first target_url
-        target_url = next(
-            (status.target_url for status in gs if status.target_url), None
-        )
-        if target_url:
-            state_text = f'<a href="{target_url}">{state_text}</a>'
+        # take the first target_url with the worst state
+        for status in gs:
+            if status.target_url and status.state == state:
+                state_text = f'<a href="{status.target_url}">{state_text}</a>'
+                break
+
         table_row = (
             f"<tr><td>{desc.name}</td><td>{desc.description}</td>"
             f"<td>{state_text}</td></tr>\n"

From a5c98638fed642d0c6b00508b8cb9c9d31ddbd32 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 29 Sep 2023 09:48:36 +0000
Subject: [PATCH 04/80] Add function jaccardIndex back with better perf

---
 src/Functions/FunctionsStringDistance.cpp     | 44 +++++++++++++++++--
 .../02884_string_distance_function.reference  |  7 +++
 .../02884_string_distance_function.sql        |  9 ++--
 3 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp
index 98a04170dd4..ae774986767 100644
--- a/src/Functions/FunctionsStringDistance.cpp
+++ b/src/Functions/FunctionsStringDistance.cpp
@@ -59,8 +59,8 @@ struct FunctionStringDistanceImpl
         size_t size = res.size();
         for (size_t i = 0; i < size; ++i)
         {
-            res[i]
-                = Op::process(haystack_data, haystack_size, needle + needle_offsets[i - 1], needle_offsets[i] - needle_offsets[i - 1] - 1);
+            res[i] = Op::process(haystack_data, haystack_size,
+                needle + needle_offsets[i - 1], needle_offsets[i] - needle_offsets[i - 1] - 1);
         }
     }
 
@@ -108,6 +108,39 @@ struct ByteHammingDistanceImpl
     }
 };
 
+struct ByteJaccardIndexImpl
+{
+    using ResultType = Float64;
+    static ResultType inline process(
+        const char * __restrict haystack, size_t haystack_size, const char * __restrict needle, size_t needle_size)
+    {
+        if (haystack_size == 0 || needle_size == 0)
+            return 0;
+
+        constexpr size_t max_size = std::numeric_limits<unsigned char>::max() + 1;
+        std::array<UInt8, max_size> haystack_set;
+        std::array<UInt8, max_size> needle_set;
+
+        haystack_set.fill(0);
+        needle_set.fill(0);
+
+        for (size_t i = 0; i < haystack_size; ++i)
+            haystack_set[static_cast<unsigned char>(haystack[i])] = 1;
+        for (size_t i = 0; i < needle_size; ++i)
+            needle_set[static_cast<unsigned char>(needle[i])] = 1;
+
+        UInt8 intersection = 0;
+        UInt8 union_size = 0;
+        for (size_t i = 0; i < max_size; ++i)
+        {
+            intersection += haystack_set[i] & needle_set[i];
+            union_size += haystack_set[i] | needle_set[i];
+        }
+
+        return static_cast<ResultType>(intersection) / static_cast<ResultType>(union_size);
+    }
+};
+
 struct ByteEditDistanceImpl
 {
     using ResultType = UInt64;
@@ -173,6 +206,9 @@ using FunctionByteHammingDistance = FunctionsStringSimilarity<FunctionStringDist
 
 using FunctionByteEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl>, NameEditDistance>;
 
+struct NameJaccardIndex { static constexpr auto name = "jaccardIndex"; };
+using FunctionByteJaccardIndex = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl>, NameJaccardIndex>;
+
 REGISTER_FUNCTION(StringDistance)
 {
     factory.registerFunction<FunctionByteHammingDistance>(
@@ -181,7 +217,9 @@ REGISTER_FUNCTION(StringDistance)
 
     factory.registerFunction<FunctionByteEditDistance>(
         FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"});
-
     factory.registerAlias("levenshteinDistance", NameEditDistance::name);
+
+    factory.registerFunction<FunctionByteJaccardIndex>(
+        FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.)"});
 }
 }
diff --git a/tests/queries/0_stateless/02884_string_distance_function.reference b/tests/queries/0_stateless/02884_string_distance_function.reference
index cedc23cc84d..4f37f0824cc 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.reference
+++ b/tests/queries/0_stateless/02884_string_distance_function.reference
@@ -2,6 +2,8 @@ const arguments byteHammingDistance
 0
 const arguments editDistance
 6
+const arguments jaccardIndex
+0.4
 byteHammingDistance
 1
 7
@@ -30,6 +32,11 @@ mismatches(alias)
 6
 3
 10
+jaccardIndex
+0.8571428571428571
+0.8571428571428571
+0
+0.4
 editDistance
 1
 1
diff --git a/tests/queries/0_stateless/02884_string_distance_function.sql b/tests/queries/0_stateless/02884_string_distance_function.sql
index 1ddb9bfbafd..3ef0307850f 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.sql
+++ b/tests/queries/0_stateless/02884_string_distance_function.sql
@@ -2,8 +2,9 @@ select 'const arguments byteHammingDistance';
 select byteHammingDistance('abcd', 'abcd');
 select 'const arguments editDistance';
 select editDistance('clickhouse', 'mouse');
-/*select 'const arguments jaccardIndex';
-select jaccardIndex('clickhouse', 'mouse');*/
+
+select 'const arguments jaccardIndex';
+select jaccardIndex('clickhouse', 'mouse');
 
 drop table if exists t;
 create table t
@@ -25,8 +26,8 @@ select mismatches(s1, s2) from t;
 select mismatches('abc', s2) from t;
 select mismatches(s2, 'def') from t;
 
-/*select 'byteJaccardIndex';
-select byteJaccardIndex(s1, s2) from t;*/
+select 'jaccardIndex';
+select jaccardIndex(s1, s2) from t;
 select 'editDistance';
 select editDistance(s1, s2) from t;
 select 'levenshteinDistance';

From 85249fb7a9b1ee6510a0262ada529b49eff6f9ea Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 2 Oct 2023 11:45:02 +0000
Subject: [PATCH 05/80] add order by to 02884_string_distance_function

---
 .../02884_string_distance_function.reference  | 30 +++++++++----------
 .../02884_string_distance_function.sql        | 18 +++++------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/tests/queries/0_stateless/02884_string_distance_function.reference b/tests/queries/0_stateless/02884_string_distance_function.reference
index 4f37f0824cc..26aefe4d759 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.reference
+++ b/tests/queries/0_stateless/02884_string_distance_function.reference
@@ -5,45 +5,45 @@ const arguments editDistance
 const arguments jaccardIndex
 0.4
 byteHammingDistance
-1
 7
+1
 7
 10
 byteHammingDistance(const, non const)
 3
-6
 3
+6
 10
 byteHammingDistance(non const, const)
-6
-6
 3
+6
+6
 10
 mismatches(alias)
+7
 1
 7
-7
+10
+3
+3
+6
 10
 3
 6
-3
-10
 6
-6
-3
 10
 jaccardIndex
-0.8571428571428571
-0.8571428571428571
 0
+0.8571428571428571
+0.8571428571428571
 0.4
 editDistance
-1
-1
 7
+1
+1
 6
 levenshteinDistance
-1
-1
 7
+1
+1
 6
diff --git a/tests/queries/0_stateless/02884_string_distance_function.sql b/tests/queries/0_stateless/02884_string_distance_function.sql
index 3ef0307850f..ccb2a3956d2 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.sql
+++ b/tests/queries/0_stateless/02884_string_distance_function.sql
@@ -15,23 +15,23 @@ create table t
 
 insert into t values ('abcdefg', 'abcdef') ('abcdefg', 'bcdefg') ('abcdefg', '') ('mouse', 'clickhouse');
 select 'byteHammingDistance';
-select byteHammingDistance(s1, s2) from t;
+select byteHammingDistance(s1, s2) FROM t ORDER BY s1, s2;
 select 'byteHammingDistance(const, non const)';
-select byteHammingDistance('abc', s2) from t;
+select byteHammingDistance('abc', s2) FROM t ORDER BY s1, s2;
 select 'byteHammingDistance(non const, const)';
-select byteHammingDistance(s2, 'def') from t;
+select byteHammingDistance(s2, 'def') FROM t ORDER BY s1, s2;
 
 select 'mismatches(alias)';
-select mismatches(s1, s2) from t;
-select mismatches('abc', s2) from t;
-select mismatches(s2, 'def') from t;
+select mismatches(s1, s2) FROM t ORDER BY s1, s2;
+select mismatches('abc', s2) FROM t ORDER BY s1, s2;
+select mismatches(s2, 'def') FROM t ORDER BY s1, s2;
 
 select 'jaccardIndex';
-select jaccardIndex(s1, s2) from t;
+select jaccardIndex(s1, s2) FROM t ORDER BY s1, s2;
 select 'editDistance';
-select editDistance(s1, s2) from t;
+select editDistance(s1, s2) FROM t ORDER BY s1, s2;
 select 'levenshteinDistance';
-select levenshteinDistance(s1, s2) from t;
+select levenshteinDistance(s1, s2) FROM t ORDER BY s1, s2;
 
 SELECT editDistance(randomString(power(2, 17)), 'abc'); -- { serverError TOO_LARGE_STRING_SIZE}
 

From 1f936d1966552d5b957d0af998b1e81b3f3413f3 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 2 Oct 2023 11:49:52 +0000
Subject: [PATCH 06/80] upd byteHammingDistance doc

---
 .../sql-reference/functions/string-search-functions.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 264708513fa..94a1a1bf19f 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -689,21 +689,21 @@ Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance
 **Syntax**
 
 ```sql
-byteHammingDistance(string2, string2)
+byteHammingDistance(string1, string2)
 ```
 
 **Examples**
 
 ``` sql
-SELECT byteHammingDistance('abc', 'ab') ;
+SELECT byteHammingDistance('karolin', 'kathrin');
 ```
 
 Result:
 
 ``` text
-┌─byteHammingDistance('abc', 'ab')─┐
-│                                1 │
-└──────────────────────────────────┘
+┌─byteHammingDistance('karolin', 'kathrin')─┐
+│                                         3 │
+└───────────────────────────────────────────┘
 ```
 
 - Alias: mismatches

From 14ba4696f933ca8220cac7948684fb5be48d3e0c Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 2 Oct 2023 11:57:59 +0000
Subject: [PATCH 07/80] Rename jaccardIndex -> stringJaccardIndex, upd doc

---
 .../functions/string-search-functions.md      | 20 ++++++++---------
 src/Functions/FunctionsStringDistance.cpp     | 22 +++++++++----------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 94a1a1bf19f..d37f417c7b5 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -706,30 +706,30 @@ Result:
 └───────────────────────────────────────────┘
 ```
 
-- Alias: mismatches
+Alias: mismatches
 
-## jaccardIndex
+## stringJaccardIndex
 
 Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.
 
 **Syntax**
 
 ```sql
-byteJaccardIndex(string1, string2)
+stringJaccardIndex(string1, string2)
 ```
 
 **Examples**
 
 ``` sql
-SELECT jaccardIndex('clickhouse', 'mouse');
+SELECT stringJaccardIndex('clickhouse', 'mouse');
 ```
 
 Result:
 
 ``` text
-┌─jaccardIndex('clickhouse', 'mouse')─┐
-│                                     0.4 │
-└─────────────────────────────────────────┘
+┌─stringJaccardIndex('clickhouse', 'mouse')─┐
+│                                       0.4 │
+└───────────────────────────────────────────┘
 ```
 
 ## editDistance
@@ -752,8 +752,8 @@ Result:
 
 ``` text
 ┌─editDistance('clickhouse', 'mouse')─┐
-│                                       6 │
-└─────────────────────────────────────────┘
+│                                   6 │
+└─────────────────────────────────────┘
 ```
 
-- Alias: levenshteinDistance
+Alias: levenshteinDistance
diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp
index ae774986767..18e297fa4f6 100644
--- a/src/Functions/FunctionsStringDistance.cpp
+++ b/src/Functions/FunctionsStringDistance.cpp
@@ -156,9 +156,8 @@ struct ByteEditDistanceImpl
         if (haystack_size > max_string_size || needle_size > max_string_size)
             throw Exception(
                 ErrorCodes::TOO_LARGE_STRING_SIZE,
-                "The string size is too big for function byteEditDistance. "
-                "Should be at most {}",
-                max_string_size);
+                "The string size is too big for function editDistance, "
+                "should be at most {}", max_string_size);
 
         PaddedPODArray<ResultType> distances0(haystack_size + 1, 0);
         PaddedPODArray<ResultType> distances1(haystack_size + 1, 0);
@@ -196,18 +195,19 @@ struct NameByteHammingDistance
 {
     static constexpr auto name = "byteHammingDistance";
 };
+using FunctionByteHammingDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteHammingDistanceImpl>, NameByteHammingDistance>;
 
 struct NameEditDistance
 {
     static constexpr auto name = "editDistance";
 };
+using FunctionEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl>, NameEditDistance>;
 
-using FunctionByteHammingDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteHammingDistanceImpl>, NameByteHammingDistance>;
-
-using FunctionByteEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl>, NameEditDistance>;
-
-struct NameJaccardIndex { static constexpr auto name = "jaccardIndex"; };
-using FunctionByteJaccardIndex = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl>, NameJaccardIndex>;
+struct NameJaccardIndex
+{
+    static constexpr auto name = "stringJaccardIndex";
+};
+using FunctionStringJaccardIndex = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl>, NameJaccardIndex>;
 
 REGISTER_FUNCTION(StringDistance)
 {
@@ -215,11 +215,11 @@ REGISTER_FUNCTION(StringDistance)
         FunctionDocumentation{.description = R"(Calculates Hamming distance between two byte-strings.)"});
     factory.registerAlias("mismatches", NameByteHammingDistance::name);
 
-    factory.registerFunction<FunctionByteEditDistance>(
+    factory.registerFunction<FunctionEditDistance>(
         FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"});
     factory.registerAlias("levenshteinDistance", NameEditDistance::name);
 
-    factory.registerFunction<FunctionByteJaccardIndex>(
+    factory.registerFunction<FunctionStringJaccardIndex>(
         FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.)"});
 }
 }

From f7ac4367d71d7ea7b7ef6ec36bd6bc32eaec8ea3 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 2 Oct 2023 12:43:52 +0000
Subject: [PATCH 08/80] Add stringJaccardIndex to aspell-dict.txt

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 63775d22b64..2e6b68da15f 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -2280,6 +2280,7 @@ stochasticlinearregression
 stochasticlogisticregression
 storages
 storig
+stringJaccardIndex
 stringToH
 stripelog
 strtod

From cf86f753f83d425cf8bff6de0ce3d3ba35400cd3 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 5 Oct 2023 10:00:34 +0000
Subject: [PATCH 09/80] fix 02884_string_distance_function

---
 .../0_stateless/02884_string_distance_function.reference  | 4 ++--
 .../0_stateless/02884_string_distance_function.sql        | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/02884_string_distance_function.reference b/tests/queries/0_stateless/02884_string_distance_function.reference
index 26aefe4d759..46680702424 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.reference
+++ b/tests/queries/0_stateless/02884_string_distance_function.reference
@@ -2,7 +2,7 @@ const arguments byteHammingDistance
 0
 const arguments editDistance
 6
-const arguments jaccardIndex
+const arguments stringJaccardIndex
 0.4
 byteHammingDistance
 7
@@ -32,7 +32,7 @@ mismatches(alias)
 6
 6
 10
-jaccardIndex
+stringJaccardIndex
 0
 0.8571428571428571
 0.8571428571428571
diff --git a/tests/queries/0_stateless/02884_string_distance_function.sql b/tests/queries/0_stateless/02884_string_distance_function.sql
index ccb2a3956d2..829b2806382 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.sql
+++ b/tests/queries/0_stateless/02884_string_distance_function.sql
@@ -3,8 +3,8 @@ select byteHammingDistance('abcd', 'abcd');
 select 'const arguments editDistance';
 select editDistance('clickhouse', 'mouse');
 
-select 'const arguments jaccardIndex';
-select jaccardIndex('clickhouse', 'mouse');
+select 'const arguments stringJaccardIndex';
+select stringJaccardIndex('clickhouse', 'mouse');
 
 drop table if exists t;
 create table t
@@ -26,8 +26,8 @@ select mismatches(s1, s2) FROM t ORDER BY s1, s2;
 select mismatches('abc', s2) FROM t ORDER BY s1, s2;
 select mismatches(s2, 'def') FROM t ORDER BY s1, s2;
 
-select 'jaccardIndex';
-select jaccardIndex(s1, s2) FROM t ORDER BY s1, s2;
+select 'stringJaccardIndex';
+select stringJaccardIndex(s1, s2) FROM t ORDER BY s1, s2;
 select 'editDistance';
 select editDistance(s1, s2) FROM t ORDER BY s1, s2;
 select 'levenshteinDistance';

From bda15d174a0f6504a08dbc68709f24d5c55482c4 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 5 Oct 2023 11:18:44 +0000
Subject: [PATCH 10/80] Add stringJaccardIndexUTF8

---
 src/Functions/FunctionsStringDistance.cpp     | 98 ++++++++++++++++++-
 .../02884_string_distance_function.reference  | 10 ++
 .../02884_string_distance_function.sql        | 17 ++++
 3 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp
index 18e297fa4f6..3098d02630a 100644
--- a/src/Functions/FunctionsStringDistance.cpp
+++ b/src/Functions/FunctionsStringDistance.cpp
@@ -5,6 +5,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsStringSimilarity.h>
 #include <Common/PODArray.h>
+#include <Common/UTF8Helpers.h>
 
 #ifdef __SSE4_2__
 #    include <nmmintrin.h>
@@ -14,6 +15,7 @@ namespace DB
 {
 namespace ErrorCodes
 {
+extern const int BAD_ARGUMENTS;
 extern const int TOO_LARGE_STRING_SIZE;
 }
 
@@ -108,6 +110,7 @@ struct ByteHammingDistanceImpl
     }
 };
 
+template <bool is_utf8>
 struct ByteJaccardIndexImpl
 {
     using ResultType = Float64;
@@ -117,20 +120,97 @@ struct ByteJaccardIndexImpl
         if (haystack_size == 0 || needle_size == 0)
             return 0;
 
+        const char * haystack_end = haystack + haystack_size;
+        const char * needle_end = needle + needle_size;
+
+        /// For byte strings use plain array as a set
         constexpr size_t max_size = std::numeric_limits<unsigned char>::max() + 1;
         std::array<UInt8, max_size> haystack_set;
         std::array<UInt8, max_size> needle_set;
 
+        /// For UTF-8 strings we also use sets of code points greater than max_size
+        std::set<UInt32> haystack_utf8_set;
+        std::set<UInt32> needle_utf8_set;
+
         haystack_set.fill(0);
         needle_set.fill(0);
 
-        for (size_t i = 0; i < haystack_size; ++i)
-            haystack_set[static_cast<unsigned char>(haystack[i])] = 1;
-        for (size_t i = 0; i < needle_size; ++i)
-            needle_set[static_cast<unsigned char>(needle[i])] = 1;
+        while (haystack < haystack_end)
+        {
+            size_t len = 1;
+            if constexpr (is_utf8)
+                len = UTF8::seqLength(*haystack);
+
+            if (len == 1)
+            {
+                haystack_set[static_cast<unsigned char>(*haystack)] = 1;
+                ++haystack;
+            }
+            else
+            {
+                auto code_point = UTF8::convertUTF8ToCodePoint(haystack, haystack_end - haystack);
+                if (code_point.has_value())
+                {
+                    haystack_utf8_set.insert(code_point.value());
+                    haystack += len;
+                }
+                else
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(haystack, haystack_end - haystack));
+                }
+            }
+        }
+
+        while (needle < needle_end)
+        {
+
+            size_t len = 1;
+            if constexpr (is_utf8)
+                len = UTF8::seqLength(*needle);
+
+            if (len == 1)
+            {
+                needle_set[static_cast<unsigned char>(*needle)] = 1;
+                ++needle;
+            }
+            else
+            {
+                auto code_point = UTF8::convertUTF8ToCodePoint(needle, needle_end - needle);
+                if (code_point.has_value())
+                {
+                    needle_utf8_set.insert(code_point.value());
+                    needle += len;
+                }
+                else
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(needle, needle_end - needle));
+                }
+            }
+        }
 
         UInt8 intersection = 0;
         UInt8 union_size = 0;
+
+        if constexpr (is_utf8)
+        {
+            auto lit = haystack_utf8_set.begin();
+            auto rit = needle_utf8_set.begin();
+            while (lit != haystack_utf8_set.end() && rit != needle_utf8_set.end())
+            {
+                if (*lit == *rit)
+                {
+                    ++intersection;
+                    ++lit;
+                    ++rit;
+                }
+                else if (*lit < *rit)
+                    ++lit;
+                else
+                    ++rit;
+            }
+            union_size = haystack_utf8_set.size() + needle_utf8_set.size() - intersection;
+        }
+
         for (size_t i = 0; i < max_size; ++i)
         {
             intersection += haystack_set[i] & needle_set[i];
@@ -207,7 +287,13 @@ struct NameJaccardIndex
 {
     static constexpr auto name = "stringJaccardIndex";
 };
-using FunctionStringJaccardIndex = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl>, NameJaccardIndex>;
+using FunctionStringJaccardIndex = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl<false>>, NameJaccardIndex>;
+
+struct NameJaccardIndexUTF8
+{
+    static constexpr auto name = "stringJaccardIndexUTF8";
+};
+using FunctionStringJaccardIndexUTF8 = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl<true>>, NameJaccardIndexUTF8>;
 
 REGISTER_FUNCTION(StringDistance)
 {
@@ -221,5 +307,7 @@ REGISTER_FUNCTION(StringDistance)
 
     factory.registerFunction<FunctionStringJaccardIndex>(
         FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.)"});
+    factory.registerFunction<FunctionStringJaccardIndexUTF8>(
+        FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two UTF8 strings.)"});
 }
 }
diff --git a/tests/queries/0_stateless/02884_string_distance_function.reference b/tests/queries/0_stateless/02884_string_distance_function.reference
index 46680702424..3ac30825fd0 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.reference
+++ b/tests/queries/0_stateless/02884_string_distance_function.reference
@@ -37,6 +37,16 @@ stringJaccardIndex
 0.8571428571428571
 0.8571428571428571
 0.4
+0
+0.8571428571428571
+0.8571428571428571
+0.4
+0.4
+0
+0
+0
+0
+0.25	0.625
 editDistance
 7
 1
diff --git a/tests/queries/0_stateless/02884_string_distance_function.sql b/tests/queries/0_stateless/02884_string_distance_function.sql
index 829b2806382..8126cfb5bd9 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.sql
+++ b/tests/queries/0_stateless/02884_string_distance_function.sql
@@ -28,6 +28,23 @@ select mismatches(s2, 'def') FROM t ORDER BY s1, s2;
 
 select 'stringJaccardIndex';
 select stringJaccardIndex(s1, s2) FROM t ORDER BY s1, s2;
+select stringJaccardIndexUTF8(s1, s2) FROM t ORDER BY s1, s2;
+
+-- we do not perform full UTF8 validation, so sometimes it just returns some result
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\x48\x65\x6C'));
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF\xFF\xFF\xFF'));
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\x41\xE2\x82\xAC'));
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x9F\x99\x82'));
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF'));
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC2\x01')); -- { serverError 36 }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC1\x81')); -- { serverError 36 }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x80\x80\x41')); -- { serverError 36 }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC0\x80')); -- { serverError 36 }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xD8\x00 ')); -- { serverError 36 }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xDC\x00')); -- { serverError 36 }
+
+SELECT stringJaccardIndexUTF8('😃🌍', '🙃😃🌑'), stringJaccardIndex('😃🌍', '🙃😃🌑');
+
 select 'editDistance';
 select editDistance(s1, s2) FROM t ORDER BY s1, s2;
 select 'levenshteinDistance';

From 90b64bcdb9160d6bb293e538d21a223fb9c61c83 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 12 Oct 2023 17:45:48 +0200
Subject: [PATCH 11/80] backward compatibility and implementation feature
 storage_metadata_write_full_object_key

---
 docs/en/operations/settings/settings.md       |   7 +
 src/Common/ObjectStorageKey.cpp               |  68 ++++
 src/Common/ObjectStorageKey.h                 |  29 ++
 src/Core/Settings.h                           |   3 +-
 src/Disks/IDisk.h                             |   8 +-
 .../AzureBlobStorage/AzureObjectStorage.cpp   |  17 +-
 .../AzureBlobStorage/AzureObjectStorage.h     |   2 +-
 .../registerDiskAzureBlobStorage.cpp          |   5 +-
 .../Cached/CachedObjectStorage.cpp            |   4 +-
 .../Cached/CachedObjectStorage.h              |   2 +-
 .../ObjectStorages/DiskObjectStorage.cpp      |  14 +-
 src/Disks/ObjectStorages/DiskObjectStorage.h  |   4 +-
 .../DiskObjectStorageMetadata.cpp             | 150 ++++++---
 .../DiskObjectStorageMetadata.h               |  44 ++-
 ...jectStorageRemoteMetadataRestoreHelper.cpp |  27 +-
 .../DiskObjectStorageTransaction.cpp          |  60 ++--
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp |   5 +-
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   |   2 +-
 src/Disks/ObjectStorages/IMetadataStorage.h   |   6 +-
 src/Disks/ObjectStorages/IObjectStorage.cpp   |  19 +-
 src/Disks/ObjectStorages/IObjectStorage.h     |  27 +-
 .../Local/LocalObjectStorage.cpp              |   9 +-
 .../ObjectStorages/Local/LocalObjectStorage.h |   5 +-
 .../Local/registerLocalObjectStorage.cpp      |  14 +-
 .../MetadataStorageFromDisk.cpp               |  43 +--
 .../ObjectStorages/MetadataStorageFromDisk.h  |  11 +-
 ...taStorageFromDiskTransactionOperations.cpp |   4 +-
 ...dataStorageFromDiskTransactionOperations.h |   9 +-
 .../MetadataStorageFromPlainObjectStorage.cpp |  49 ++-
 .../MetadataStorageFromPlainObjectStorage.h   |  15 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  36 ++-
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  16 +-
 .../ObjectStorages/S3/registerDiskS3.cpp      |   7 +-
 src/Disks/ObjectStorages/StoredObject.h       |  35 ++-
 ...etadataStorageFromStaticFilesWebServer.cpp |   5 +-
 .../MetadataStorageFromStaticFilesWebServer.h |   7 +-
 .../ObjectStorages/Web/WebObjectStorage.h     |   5 +-
 src/Storages/StorageAzureBlob.cpp             |  15 +-
 .../System/StorageSystemRemoteDataPaths.cpp   |   6 +-
 .../test_remote_blobs_naming/__init__.py      |   0
 .../configs/settings.xml                      |  10 +
 .../configs/settings_new.xml                  |  11 +
 .../configs/storage_conf.xml                  |  47 +++
 .../test_backward_compatibility.py            | 296 ++++++++++++++++++
 44 files changed, 871 insertions(+), 287 deletions(-)
 create mode 100644 src/Common/ObjectStorageKey.cpp
 create mode 100644 src/Common/ObjectStorageKey.h
 create mode 100644 tests/integration/test_remote_blobs_naming/__init__.py
 create mode 100644 tests/integration/test_remote_blobs_naming/configs/settings.xml
 create mode 100644 tests/integration/test_remote_blobs_naming/configs/settings_new.xml
 create mode 100644 tests/integration/test_remote_blobs_naming/configs/storage_conf.xml
 create mode 100644 tests/integration/test_remote_blobs_naming/test_backward_compatibility.py

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index e3d8a2bf720..67c2e8a41d3 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4804,3 +4804,10 @@ LIFETIME(MIN 0 MAX 3600)
 LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
 SETTINGS(dictionary_use_async_executor=1, max_threads=8);
 ```
+
+## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key}
+
+When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files.
+When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. 
+
+Default value: `false`.
diff --git a/src/Common/ObjectStorageKey.cpp b/src/Common/ObjectStorageKey.cpp
new file mode 100644
index 00000000000..f88e8f9fa5f
--- /dev/null
+++ b/src/Common/ObjectStorageKey.cpp
@@ -0,0 +1,68 @@
+#include "ObjectStorageKey.h"
+
+#include <Common/Exception.h>
+
+#include <filesystem>
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int LOGICAL_ERROR;
+}
+
+const String & ObjectStorageKey::getPrefix() const
+{
+    if (!is_relative)
+        throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "object key has no prefix, key: {}", key);
+
+    return prefix;
+}
+
+const String & ObjectStorageKey::getSuffix() const
+{
+    if (!is_relative)
+        throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "object key has no suffix, key: {}", key);
+    return suffix;
+}
+
+const String & ObjectStorageKey::serialize() const
+{
+    return key;
+}
+
+ObjectStorageKey ObjectStorageKey::createAsRelativeAnyway(String key_)
+{
+    ObjectStorageKey object_key;
+    object_key.suffix = std::move(key_);
+    object_key.key = object_key.suffix;
+    object_key.is_relative = true;
+    return object_key;
+}
+
+ObjectStorageKey ObjectStorageKey::createAsRelative(String prefix_, String suffix_)
+{
+    ObjectStorageKey object_key;
+    object_key.prefix = std::move(prefix_);
+    object_key.suffix = std::move(suffix_);
+
+    if (object_key.prefix.empty())
+        object_key.key = object_key.suffix;
+    else
+        object_key.key = fs::path(object_key.prefix) / object_key.suffix;
+
+    object_key.is_relative = true;
+    return object_key;
+}
+
+ObjectStorageKey ObjectStorageKey::createAsAbsolute(String key_)
+{
+    ObjectStorageKey object_key;
+    object_key.key = std::move(key_);
+    object_key.is_relative = true;
+    return object_key;
+}
+}
diff --git a/src/Common/ObjectStorageKey.h b/src/Common/ObjectStorageKey.h
new file mode 100644
index 00000000000..e10f6a2382e
--- /dev/null
+++ b/src/Common/ObjectStorageKey.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <base/types.h>
+
+#include <memory>
+
+namespace DB
+{
+    struct ObjectStorageKey
+    {
+        ObjectStorageKey() = default;
+
+        bool hasPrefix() const { return is_relative; }
+        const String & getPrefix() const;
+        const String & getSuffix() const;
+        const String & serialize() const;
+
+        static ObjectStorageKey createAsRelative(String prefix_, String suffix_);
+        static ObjectStorageKey createAsRelativeAnyway(String key_);
+        static ObjectStorageKey createAsAbsolute(String key_);
+
+    private:
+        String prefix;
+        String suffix;
+        String key;
+        bool is_relative = false;
+    };
+
+}
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 7387c8a791e..8260c41e626 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -288,7 +288,8 @@ class IColumn;
     M(Bool, http_write_exception_in_output_format, true, "Write exception in output format to produce valid output. Works with JSON and XML formats.", 0) \
     M(UInt64, http_response_buffer_size, 0, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \
     \
-    M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
+    M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0)    \
+    M(Bool, storage_metadata_write_full_object_key, false, "Enable write metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
     \
     M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
     \
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index bfb418e1c5e..6911fd86db2 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -302,12 +302,14 @@ public:
     struct LocalPathWithObjectStoragePaths
     {
         std::string local_path;
-        std::string common_prefix_for_objects;
         StoredObjects objects;
 
         LocalPathWithObjectStoragePaths(
-            const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_)
-            : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {}
+            const std::string & local_path_,
+            StoredObjects && objects_)
+            : local_path(local_path_)
+            , objects(std::move(objects_))
+        {}
     };
 
     virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithObjectStoragePaths> &)
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 73be834c1bb..400ad4a1678 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -102,9 +102,9 @@ AzureObjectStorage::AzureObjectStorage(
     data_source_description.is_encrypted = false;
 }
 
-std::string AzureObjectStorage::generateBlobNameForPath(const std::string & /* path */)
+ObjectStorageKey AzureObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
 {
-    return getRandomASCIIString(32);
+    return ObjectStorageKey::createAsRelativeAnyway(getRandomASCIIString(32));
 }
 
 bool AzureObjectStorage::exists(const StoredObject & object) const
@@ -320,18 +320,7 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
     auto client_ptr = client.get();
     for (const auto & object : objects)
     {
-        try
-        {
-            auto delete_info = client_ptr->DeleteBlob(object.remote_path);
-        }
-        catch (const Azure::Storage::StorageException & e)
-        {
-            /// If object doesn't exist...
-            if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
-                return;
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-            throw;
-        }
+        removeObjectIfExists(object);
     }
 
 }
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index 5436860818c..8e3d50418d3 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -121,7 +121,7 @@ public:
         const std::string & config_prefix,
         ContextPtr context) override;
 
-    std::string generateBlobNameForPath(const std::string & path) override;
+    ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override;
 
     bool isRemote() const override { return true; }
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
index a09befe84a8..7ba9d21db62 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp
@@ -31,11 +31,12 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access
             getAzureBlobContainerClient(config, config_prefix),
             getAzureBlobStorageSettings(config, config_prefix, context));
 
-        auto metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, "");
+        String key_prefix;
+        auto metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, key_prefix);
 
         std::shared_ptr<IDisk> azure_blob_storage_disk = std::make_shared<DiskObjectStorage>(
             name,
-            /* no namespaces */"",
+            /* no namespaces */ key_prefix,
             "DiskAzureBlobStorage",
             std::move(metadata_storage),
             std::move(azure_object_storage),
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index d94c26f27e8..e459aae190c 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -42,9 +42,9 @@ FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const
     return cache->createKeyForPath(path);
 }
 
-std::string CachedObjectStorage::generateBlobNameForPath(const std::string & path)
+ObjectStorageKey CachedObjectStorage::generateObjectKeyForPath(const std::string & path) const
 {
-    return object_storage->generateBlobNameForPath(path);
+    return object_storage->generateObjectKeyForPath(path);
 }
 
 ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settings) const
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index 925abbc6932..20b3a42540b 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -92,7 +92,7 @@ public:
 
     const std::string & getCacheName() const override { return cache_config_name; }
 
-    std::string generateBlobNameForPath(const std::string & path) override;
+    ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override;
 
     bool isRemote() const override { return object_storage->isRemote(); }
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index b4b777bd494..c1f053be7c6 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -48,14 +48,14 @@ DiskTransactionPtr DiskObjectStorage::createObjectStorageTransaction()
 
 DiskObjectStorage::DiskObjectStorage(
     const String & name_,
-    const String & object_storage_root_path_,
+    const String & object_key_prefix_,
     const String & log_name,
     MetadataStoragePtr metadata_storage_,
     ObjectStoragePtr object_storage_,
     const Poco::Util::AbstractConfiguration & config,
     const String & config_prefix)
     : IDisk(name_, config, config_prefix)
-    , object_storage_root_path(object_storage_root_path_)
+    , object_key_prefix(object_key_prefix_)
     , log (&Poco::Logger::get("DiskObjectStorage(" + log_name + ")"))
     , metadata_storage(std::move(metadata_storage_))
     , object_storage(std::move(object_storage_))
@@ -80,7 +80,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::
     {
         try
         {
-            paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path));
+            paths_map.emplace_back(local_path, getStorageObjects(local_path));
         }
         catch (const Exception & e)
         {
@@ -243,9 +243,9 @@ String DiskObjectStorage::getUniqueId(const String & path) const
 
 bool DiskObjectStorage::checkUniqueId(const String & id) const
 {
-    if (!id.starts_with(object_storage_root_path))
+    if (!id.starts_with(object_key_prefix))
     {
-        LOG_DEBUG(log, "Blob with id {} doesn't start with blob storage prefix {}, Stack {}", id, object_storage_root_path, StackTrace().toString());
+        LOG_DEBUG(log, "Blob with id {} doesn't start with blob storage prefix {}, Stack {}", id, object_key_prefix, StackTrace().toString());
         return false;
     }
 
@@ -470,7 +470,7 @@ DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage()
     const auto config_prefix = "storage_configuration.disks." + name;
     return std::make_shared<DiskObjectStorage>(
         getName(),
-        object_storage_root_path,
+        object_key_prefix,
         getName(),
         metadata_storage,
         object_storage,
@@ -586,7 +586,7 @@ void DiskObjectStorage::restoreMetadataIfNeeded(
     {
         metadata_helper->restore(config, config_prefix, context);
 
-        auto current_schema_version = metadata_helper->readSchemaVersion(object_storage.get(), object_storage_root_path);
+        auto current_schema_version = metadata_helper->readSchemaVersion(object_storage.get(), object_key_prefix);
         if (current_schema_version < DiskObjectStorageRemoteMetadataRestoreHelper::RESTORABLE_SCHEMA_VERSION)
             metadata_helper->migrateToRestorableSchema();
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h
index ccd7e807513..66d1b02aea7 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.h
@@ -37,7 +37,7 @@ friend class DiskObjectStorageRemoteMetadataRestoreHelper;
 public:
     DiskObjectStorage(
         const String & name_,
-        const String & object_storage_root_path_,
+        const String & object_key_prefix_,
         const String & log_name,
         MetadataStoragePtr metadata_storage_,
         ObjectStoragePtr object_storage_,
@@ -224,7 +224,7 @@ private:
     String getReadResourceName() const;
     String getWriteResourceName() const;
 
-    const String object_storage_root_path;
+    const String object_key_prefix;
     Poco::Logger * log;
 
     MetadataStoragePtr metadata_storage;
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
index dfb84ab386a..5ce6a99c4fd 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
@@ -7,6 +7,8 @@
 #include <IO/WriteBufferFromFileBase.h>
 #include <Common/logger_useful.h>
 
+#include <Interpreters/Context.h>
+
 namespace DB
 {
 
@@ -17,44 +19,57 @@ namespace ErrorCodes
 
 void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
 {
-    UInt32 version;
     readIntText(version, buf);
 
-    if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_INLINE_DATA)
+    if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_FULL_OBJECT_KEY)
         throw Exception(
             ErrorCodes::UNKNOWN_FORMAT,
             "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}",
-            common_metadata_path + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG));
+            metadata_file_path, toString(version), toString(VERSION_FULL_OBJECT_KEY));
 
     assertChar('\n', buf);
 
-    UInt32 storage_objects_count;
-    readIntText(storage_objects_count, buf);
+    UInt32 keys_count;
+    readIntText(keys_count, buf);
     assertChar('\t', buf);
+    keys_with_meta.resize(keys_count);
+
     readIntText(total_size, buf);
     assertChar('\n', buf);
-    storage_objects.resize(storage_objects_count);
 
-    for (size_t i = 0; i < storage_objects_count; ++i)
+    for (UInt32 i = 0; i < keys_count; ++i)
     {
-        String object_relative_path;
-        size_t object_size;
+        UInt64 object_size;
         readIntText(object_size, buf);
         assertChar('\t', buf);
-        readEscapedString(object_relative_path, buf);
-        if (version == VERSION_ABSOLUTE_PATHS)
-        {
-            if (!object_relative_path.starts_with(object_storage_root_path))
-                throw Exception(ErrorCodes::UNKNOWN_FORMAT,
-                    "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}",
-                    object_relative_path, object_storage_root_path, common_metadata_path);
 
-            object_relative_path = object_relative_path.substr(object_storage_root_path.size());
-        }
+        keys_with_meta[i].metadata.size_bytes = object_size;
+
+        String key_value;
+        readEscapedString(key_value, buf);
         assertChar('\n', buf);
 
-        storage_objects[i].relative_path = object_relative_path;
-        storage_objects[i].metadata.size_bytes = object_size;
+        if (version == VERSION_ABSOLUTE_PATHS)
+        {
+            if (!key_value.starts_with(compatible_key_prefix))
+                throw Exception(
+                    ErrorCodes::UNKNOWN_FORMAT,
+                    "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}",
+                    key_value,
+                    compatible_key_prefix,
+                    metadata_file_path);
+
+            keys_with_meta[i].key = ObjectStorageKey::createAsRelative(
+                compatible_key_prefix, key_value.substr(compatible_key_prefix.size()));
+        }
+        else if (version < VERSION_FULL_OBJECT_KEY)
+        {
+            keys_with_meta[i].key = ObjectStorageKey::createAsRelative(compatible_key_prefix, key_value);
+        }
+        else if (version >= VERSION_FULL_OBJECT_KEY)
+        {
+            keys_with_meta[i].key = ObjectStorageKey::createAsAbsolute(key_value);
+        }
     }
 
     readIntText(ref_count, buf);
@@ -73,7 +88,7 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
     }
 }
 
-void DiskObjectStorageMetadata::deserializeFromString(const std::string & data)
+void DiskObjectStorageMetadata::deserializeFromString(const String & data)
 {
     ReadBufferFromString buf(data);
     deserialize(buf);
@@ -81,21 +96,55 @@ void DiskObjectStorageMetadata::deserializeFromString(const std::string & data)
 
 void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
 {
-    writeIntText(VERSION_INLINE_DATA, buf);
+    /// There are the changes for backward compatibility
+    /// No new file should be write as VERSION_FULL_OBJECT_KEY until storage_metadata_write_full_object_key feature is enabled
+    /// However, in case of rollback, once file had been written as VERSION_FULL_OBJECT_KEY
+    /// it has to be always rewritten as VERSION_FULL_OBJECT_KEY
 
+    bool storage_metadata_write_full_object_key = getWriteFullObjectKeySetting();
+
+    if (version == VERSION_FULL_OBJECT_KEY && !storage_metadata_write_full_object_key)
+    {
+        Poco::Logger * logger = &Poco::Logger::get("DiskObjectStorageMetadata");
+        LOG_WARNING(
+            logger,
+            "Metadata file {} is written with VERSION_FULL_OBJECT_KEY version"
+            "However storage_metadata_write_full_object_key is off.",
+            metadata_file_path);
+    }
+
+    UInt32 write_version = version;
+    if (storage_metadata_write_full_object_key)
+        write_version = VERSION_FULL_OBJECT_KEY;
+
+    chassert(write_version >= VERSION_ABSOLUTE_PATHS && write_version <= VERSION_FULL_OBJECT_KEY);
+    writeIntText(write_version, buf);
     writeChar('\n', buf);
 
-    writeIntText(storage_objects.size(), buf);
+    writeIntText(keys_with_meta.size(), buf);
     writeChar('\t', buf);
     writeIntText(total_size, buf);
     writeChar('\n', buf);
 
-    for (const auto & [object_relative_path, object_metadata] : storage_objects)
+    for (const auto & [object_key, object_meta] : keys_with_meta)
     {
-        writeIntText(object_metadata.size_bytes, buf);
+        writeIntText(object_meta.size_bytes, buf);
         writeChar('\t', buf);
-        writeEscapedString(object_relative_path, buf);
-        writeChar('\n', buf);
+
+        if (write_version == VERSION_FULL_OBJECT_KEY)
+        {
+            /// if the metadata file has VERSION_FULL_OBJECT_KEY version
+            /// all keys inside are written as absolute paths
+            writeEscapedString(object_key.serialize(), buf);
+            writeChar('\n', buf);
+        }
+        else
+        {
+            /// otherwise keys are written as relative paths
+            /// therefore keys have to have suffix and prefix
+            writeEscapedString(object_key.getSuffix(), buf);
+            writeChar('\n', buf);
+        }
     }
 
     writeIntText(ref_count, buf);
@@ -104,11 +153,6 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
     writeBoolText(read_only, buf);
     writeChar('\n', buf);
 
-    /// Metadata version describes the format of the file
-    /// It determines the possibility of writing and reading a particular set of fields from the file, no matter the fields' values.
-    /// It should not be dependent on field values.
-    /// We always write inline_data in the file when we declare VERSION_INLINE_DATA as a file version,
-    /// unless it is impossible to introduce the next version of the format.
     writeEscapedString(inline_data, buf);
     writeChar('\n', buf);
 
@@ -117,7 +161,7 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
         buf.sync();
 }
 
-std::string DiskObjectStorageMetadata::serializeToString() const
+String DiskObjectStorageMetadata::serializeToString() const
 {
     WriteBufferFromOwnString result;
     serialize(result, false);
@@ -126,20 +170,44 @@ std::string DiskObjectStorageMetadata::serializeToString() const
 
 /// Load metadata by path or create empty if `create` flag is set.
 DiskObjectStorageMetadata::DiskObjectStorageMetadata(
-        const std::string & common_metadata_path_,
-        const String & object_storage_root_path_,
-        const String & metadata_file_path_)
-    : common_metadata_path(common_metadata_path_)
-    , object_storage_root_path(object_storage_root_path_)
-    , metadata_file_path(metadata_file_path_)
+    String compatible_key_prefix_,
+    String metadata_file_path_)
+    : compatible_key_prefix(std::move(compatible_key_prefix_))
+    , metadata_file_path(std::move(metadata_file_path_))
 {
 }
 
-void DiskObjectStorageMetadata::addObject(const String & path, size_t size)
+void DiskObjectStorageMetadata::addObject(ObjectStorageKey key, size_t size)
 {
+    if (!key.hasPrefix())
+    {
+        version = VERSION_FULL_OBJECT_KEY;
+
+        bool storage_metadata_write_full_object_key = getWriteFullObjectKeySetting();
+        if (!storage_metadata_write_full_object_key)
+        {
+            Poco::Logger * logger = &Poco::Logger::get("DiskObjectStorageMetadata");
+            LOG_WARNING(
+                logger,
+                "Metadata file {} has at least one key {} without fixed common key prefix."
+                "That forces using VERSION_FULL_OBJECT_KEY version for that metadata file."
+                "However storage_metadata_write_full_object_key is off.",
+                metadata_file_path,
+                key.serialize());
+        }
+    }
+
     total_size += size;
-    storage_objects.emplace_back(path, ObjectMetadata{size, {}, {}});
+    keys_with_meta.emplace_back(std::move(key), ObjectMetadata{size, {}, {}});
 }
 
+bool DiskObjectStorageMetadata::getWriteFullObjectKeySetting()
+{
+#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
+    return Context::getGlobalContextInstance()->getSettings().storage_metadata_write_full_object_key;
+#else
+    return false;
+#endif
+}
 
 }
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h
index 1abb829c12a..658914b7611 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h
@@ -13,29 +13,30 @@ struct DiskObjectStorageMetadata
 {
 private:
     /// Metadata file version.
-    static constexpr uint32_t VERSION_ABSOLUTE_PATHS = 1;
-    static constexpr uint32_t VERSION_RELATIVE_PATHS = 2;
-    static constexpr uint32_t VERSION_READ_ONLY_FLAG = 3;
-    static constexpr uint32_t VERSION_INLINE_DATA = 4;
+    static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1;
+    static constexpr UInt32 VERSION_RELATIVE_PATHS = 2;
+    static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3;
+    static constexpr UInt32 VERSION_INLINE_DATA = 4;
+    static constexpr UInt32 VERSION_FULL_OBJECT_KEY = 5; /// only for reading data
 
-    const std::string & common_metadata_path;
+    UInt32 version = VERSION_INLINE_DATA;
 
-    /// Relative paths of blobs.
-    RelativePathsWithMetadata storage_objects;
+    /// Absolute paths of blobs
+    ObjectKeysWithMetadata keys_with_meta;
 
-    const std::string object_storage_root_path;
+    const std::string compatible_key_prefix;
 
     /// Relative path to metadata file on local FS.
     const std::string metadata_file_path;
 
     /// Total size of all remote FS (S3, HDFS) objects.
-    size_t total_size = 0;
+    UInt64 total_size = 0;
 
     /// Number of references (hardlinks) to this metadata file.
     ///
     /// FIXME: Why we are tracking it explicitly, without
     /// info from filesystem????
-    uint32_t ref_count = 0;
+    UInt32 ref_count = 0;
 
     /// Flag indicates that file is read only.
     bool read_only = false;
@@ -46,11 +47,11 @@ private:
 public:
 
     DiskObjectStorageMetadata(
-        const std::string & common_metadata_path_,
-        const std::string & object_storage_root_path_,
-        const std::string & metadata_file_path_);
+        String compatible_key_prefix_,
+        String metadata_file_path_);
+
+    void addObject(ObjectStorageKey key, size_t size);
 
-    void addObject(const std::string & path, size_t size);
 
     void deserialize(ReadBuffer & buf);
     void deserializeFromString(const std::string & data);
@@ -58,14 +59,9 @@ public:
     void serialize(WriteBuffer & buf, bool sync) const;
     std::string serializeToString() const;
 
-    std::string getBlobsCommonPrefix() const
+    const ObjectKeysWithMetadata & getKeysWithMeta() const
     {
-        return object_storage_root_path;
-    }
-
-    RelativePathsWithMetadata getBlobsRelativePaths() const
-    {
-        return storage_objects;
+        return keys_with_meta;
     }
 
     bool isReadOnly() const
@@ -73,12 +69,12 @@ public:
         return read_only;
     }
 
-    uint32_t getRefCount() const
+    UInt32 getRefCount() const
     {
         return ref_count;
     }
 
-    uint64_t getTotalSizeBytes() const
+    UInt64 getTotalSizeBytes() const
     {
         return total_size;
     }
@@ -112,6 +108,8 @@ public:
     {
         return inline_data;
     }
+
+    static bool getWriteFullObjectKeySetting();
 };
 
 using DiskObjectStorageMetadataPtr = std::unique_ptr<DiskObjectStorageMetadata>;
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
index 0b2d95fff70..33b98cd328c 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
@@ -34,7 +34,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::createFileOperationObject(
     const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const
 {
     const String relative_path = "operations/r" + revisionToString(revision) + operation_log_suffix + "-" + operation_name;
-    StoredObject object(fs::path(disk->object_storage_root_path) / relative_path);
+    StoredObject object(fs::path(disk->object_key_prefix) / relative_path);
     auto buf = disk->object_storage->writeObject(object, WriteMode::Rewrite, metadata);
     buf->write('0');
     buf->finalize();
@@ -52,8 +52,8 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::findLastRevision()
         LOG_TRACE(disk->log, "Check object exists with revision prefix {}", revision_prefix);
 
         const auto & object_storage = disk->object_storage;
-        StoredObject revision_object{disk->object_storage_root_path + "r" + revision_prefix};
-        StoredObject revision_operation_object{disk->object_storage_root_path + "operations/r" + revision_prefix};
+        StoredObject revision_object{disk->object_key_prefix + "r" + revision_prefix};
+        StoredObject revision_operation_object{disk->object_key_prefix + "operations/r" + revision_prefix};
 
         /// Check file or operation with such revision prefix exists.
         if (object_storage->exists(revision_object) || object_storage->exists(revision_operation_object))
@@ -80,7 +80,7 @@ int DiskObjectStorageRemoteMetadataRestoreHelper::readSchemaVersion(IObjectStora
 
 void DiskObjectStorageRemoteMetadataRestoreHelper::saveSchemaVersion(const int & version) const
 {
-    StoredObject object{fs::path(disk->object_storage_root_path) / SCHEMA_VERSION_OBJECT};
+    StoredObject object{fs::path(disk->object_key_prefix) / SCHEMA_VERSION_OBJECT};
 
     auto buf = disk->object_storage->writeObject(object, WriteMode::Rewrite, /* attributes= */ {}, /* buf_size= */ DBMS_DEFAULT_BUFFER_SIZE, write_settings);
     writeIntText(version, *buf);
@@ -187,7 +187,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restore(const Poco::Util::Abs
     try
     {
         RestoreInformation information;
-        information.source_path = disk->object_storage_root_path;
+        information.source_path = disk->object_key_prefix;
         information.source_namespace = disk->object_storage->getObjectsNamespace();
 
         readRestoreInformation(information);
@@ -201,11 +201,11 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restore(const Poco::Util::Abs
         {
             /// In this case we need to additionally cleanup S3 from objects with later revision.
             /// Will be simply just restore to different path.
-            if (information.source_path == disk->object_storage_root_path && information.revision != LATEST_REVISION)
+            if (information.source_path == disk->object_key_prefix && information.revision != LATEST_REVISION)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Restoring to the same bucket and path is allowed if revision is latest (0)");
 
             /// This case complicates S3 cleanup in case of unsuccessful restore.
-            if (information.source_path != disk->object_storage_root_path && disk->object_storage_root_path.starts_with(information.source_path))
+            if (information.source_path != disk->object_key_prefix && disk->object_key_prefix.starts_with(information.source_path))
                 throw Exception(
                     ErrorCodes::BAD_ARGUMENTS,
                     "Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk");
@@ -224,7 +224,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restore(const Poco::Util::Abs
 
         LOG_INFO(disk->log, "Removing old metadata...");
 
-        bool cleanup_s3 = information.source_path != disk->object_storage_root_path;
+        bool cleanup_s3 = information.source_path != disk->object_key_prefix;
         for (const auto & root : data_roots)
             if (disk->exists(root))
                 disk->removeSharedRecursive(root + '/', !cleanup_s3, {});
@@ -424,18 +424,17 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
             continue;
 
         disk->createDirectories(directoryPath(path));
-        auto relative_key = shrinkKey(source_path, key);
-        auto full_path = fs::path(disk->object_storage_root_path) / relative_key;
+        auto object_key = ObjectStorageKey::createAsRelative(disk->object_key_prefix, shrinkKey(source_path, key));
 
         StoredObject object_from{key};
-        StoredObject object_to{fs::path(disk->object_storage_root_path) / relative_key};
+        StoredObject object_to{object_key.serialize()};
 
         /// Copy object if we restore to different bucket / path.
-        if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->object_storage_root_path != source_path)
+        if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->object_key_prefix != source_path)
             source_object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, *disk->object_storage);
 
         auto tx = disk->metadata_storage->createTransaction();
-        tx->addBlobToMetadata(path, relative_key, meta.size_bytes);
+        tx->addBlobToMetadata(path, object_key, meta.size_bytes);
         tx->commit();
 
         LOG_TRACE(disk->log, "Restored file {}", path);
@@ -464,7 +463,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
 {
     /// Enable recording file operations if we restore to different bucket / path.
     bool send_metadata = source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace()
-        || disk->object_storage_root_path != restore_information.source_path;
+        || disk->object_key_prefix != restore_information.source_path;
 
     std::set<String> renames;
     auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const RelativePathsWithMetadata & objects)
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index 66ee2e746b4..8bcb2cf06bb 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -25,6 +25,7 @@ namespace ErrorCodes
     extern const int BAD_FILE_TYPE;
     extern const int FILE_ALREADY_EXISTS;
     extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
+    extern const int LOGICAL_ERROR;
 }
 
 DiskObjectStorageTransaction::DiskObjectStorageTransaction(
@@ -511,12 +512,12 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation
 
         for (const auto & object_from : source_blobs)
         {
-            std::string blob_name = object_storage.generateBlobNameForPath(to_path);
-            auto object_to = StoredObject(fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
+            auto object_key = object_storage.generateObjectKeyForPath(to_path);
+            auto object_to = StoredObject(object_key.serialize());
 
             object_storage.copyObject(object_from, object_to, read_settings, write_settings);
 
-            tx->addBlobToMetadata(to_path, blob_name, object_from.bytes_size);
+            tx->addBlobToMetadata(to_path, object_key, object_from.bytes_size);
 
             created_objects.push_back(object_to);
         }
@@ -663,46 +664,53 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
     const WriteSettings & settings,
     bool autocommit)
 {
-    String blob_name;
+    auto object_key = object_storage.generateObjectKeyForPath(path);
     std::optional<ObjectAttributes> object_attributes;
 
-    blob_name = object_storage.generateBlobNameForPath(path);
     if (metadata_helper)
     {
+        if (!object_key.hasPrefix())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "metadata helper is not supported with abs paths");
+
         auto revision = metadata_helper->revision_counter + 1;
         metadata_helper->revision_counter++;
         object_attributes = {
             {"path", path}
         };
-        blob_name = "r" + revisionToString(revision) + "-file-" + blob_name;
+
+        object_key = ObjectStorageKey::createAsRelative(
+            object_key.getPrefix(),
+            "r" + revisionToString(revision) + "-file-" + object_key.getSuffix());
     }
 
-    auto object = StoredObject(fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
-    auto write_operation = std::make_unique<WriteFileObjectStorageOperation>(object_storage, metadata_storage, object);
+    /// seems ok
+    auto object = StoredObject(object_key.serialize());
     std::function<void(size_t count)> create_metadata_callback;
 
     if (autocommit)
     {
-        create_metadata_callback = [tx = shared_from_this(), mode, path, blob_name](size_t count)
+        create_metadata_callback = [tx = shared_from_this(), mode, path, object_key](size_t count)
         {
             if (mode == WriteMode::Rewrite)
             {
-                // Otherwise we will produce lost blobs which nobody points to
+                /// Otherwise we will produce lost blobs which nobody points to
                 /// WriteOnce storages are not affected by the issue
                 if (!tx->object_storage.isWriteOnce() && tx->metadata_storage.exists(path))
                     tx->object_storage.removeObjectsIfExist(tx->metadata_storage.getStorageObjects(path));
 
-                tx->metadata_transaction->createMetadataFile(path, blob_name, count);
+                tx->metadata_transaction->createMetadataFile(path, object_key, count);
             }
             else
-                tx->metadata_transaction->addBlobToMetadata(path, blob_name, count);
+                tx->metadata_transaction->addBlobToMetadata(path, object_key, count);
 
             tx->metadata_transaction->commit();
         };
     }
     else
     {
-        create_metadata_callback = [object_storage_tx = shared_from_this(), write_op = write_operation.get(), mode, path, blob_name](size_t count)
+        auto write_operation = std::make_unique<WriteFileObjectStorageOperation>(object_storage, metadata_storage, object);
+
+        create_metadata_callback = [object_storage_tx = shared_from_this(), write_op = write_operation.get(), mode, path, object_key](size_t count)
         {
             /// This callback called in WriteBuffer finalize method -- only there we actually know
             /// how many bytes were written. We don't control when this finalize method will be called
@@ -714,7 +722,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
             /// ...
             /// buf1->finalize() // shouldn't do anything with metadata operations, just memoize what to do
             /// tx->commit()
-            write_op->setOnExecute([object_storage_tx, mode, path, blob_name, count](MetadataTransactionPtr tx)
+            write_op->setOnExecute([object_storage_tx, mode, path, object_key, count](MetadataTransactionPtr tx)
             {
                 if (mode == WriteMode::Rewrite)
                 {
@@ -726,15 +734,16 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
                             object_storage_tx->metadata_storage.getStorageObjects(path));
                     }
 
-                    tx->createMetadataFile(path, blob_name, count);
+                    tx->createMetadataFile(path, object_key, count);
                 }
                 else
-                    tx->addBlobToMetadata(path, blob_name, count);
+                    tx->addBlobToMetadata(path, object_key, count);
             });
         };
+
+        operations_to_execute.emplace_back(std::move(write_operation));
     }
 
-    operations_to_execute.emplace_back(std::move(write_operation));
 
     auto impl = object_storage.writeObject(
         object,
@@ -753,20 +762,27 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
     const String & path, WriteMode mode, WriteBlobFunction && write_blob_function)
 {
     /// This function is a simplified and adapted version of DiskObjectStorageTransaction::writeFile().
-    auto blob_name = object_storage.generateBlobNameForPath(path);
+    auto object_key = object_storage.generateObjectKeyForPath(path);
     std::optional<ObjectAttributes> object_attributes;
 
     if (metadata_helper)
     {
+        if (!object_key.hasPrefix())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "metadata helper is not supported with abs paths");
+
         auto revision = metadata_helper->revision_counter + 1;
         metadata_helper->revision_counter++;
         object_attributes = {
             {"path", path}
         };
-        blob_name = "r" + revisionToString(revision) + "-file-" + blob_name;
+
+        object_key = ObjectStorageKey::createAsRelative(
+            object_key.getPrefix(),
+            "r" + revisionToString(revision) + "-file-" + object_key.getSuffix());
     }
 
-    auto object = StoredObject(fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
+    /// seems ok
+    auto object = StoredObject(object_key.serialize());
     auto write_operation = std::make_unique<WriteFileObjectStorageOperation>(object_storage, metadata_storage, object);
 
     operations_to_execute.emplace_back(std::move(write_operation));
@@ -788,10 +804,10 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
         if (!object_storage.isWriteOnce() && metadata_storage.exists(path))
             object_storage.removeObjectsIfExist(metadata_storage.getStorageObjects(path));
 
-        metadata_transaction->createMetadataFile(path, blob_name, object_size);
+        metadata_transaction->createMetadataFile(path, std::move(object_key), object_size);
     }
     else
-        metadata_transaction->addBlobToMetadata(path, blob_name, object_size);
+        metadata_transaction->addBlobToMetadata(path, std::move(object_key), object_size);
 }
 
 
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 5eca98aa494..662b20f4d31 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -28,9 +28,10 @@ void HDFSObjectStorage::startup()
 {
 }
 
-std::string HDFSObjectStorage::generateBlobNameForPath(const std::string & /* path */)
+ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
 {
-    return getRandomASCIIString(32);
+    /// what ever data_source_description.description value is, consider that key as relative key
+    return ObjectStorageKey::createAsRelative(data_source_description.description, getRandomASCIIString(32));
 }
 
 bool HDFSObjectStorage::exists(const StoredObject & object) const
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index 8d770c12d8f..fe0893f963b 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -114,7 +114,7 @@ public:
         const std::string & config_prefix,
         ContextPtr context) override;
 
-    std::string generateBlobNameForPath(const std::string & path) override;
+    ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override;
 
     bool isRemote() const override { return true; }
 
diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h
index 6b75e157dee..9e5078736d2 100644
--- a/src/Disks/ObjectStorages/IMetadataStorage.h
+++ b/src/Disks/ObjectStorages/IMetadataStorage.h
@@ -126,10 +126,10 @@ public:
     virtual void createEmptyMetadataFile(const std::string & path) = 0;
 
     /// Create metadata file on paths with content (blob_name, size_in_bytes)
-    virtual void createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) = 0;
+    virtual void createMetadataFile(const std::string & path, ObjectStorageKey key, uint64_t size_in_bytes) = 0;
 
     /// Add to new blob to metadata file (way to implement appends)
-    virtual void addBlobToMetadata(const std::string & /* path */, const std::string & /* blob_name */, uint64_t /* size_in_bytes */)
+    virtual void addBlobToMetadata(const std::string & /* path */, ObjectStorageKey /* key */, uint64_t /* size_in_bytes */)
     {
         throwNotImplemented();
     }
@@ -221,8 +221,6 @@ public:
     /// object_storage_path is absolute.
     virtual StoredObjects getStorageObjects(const std::string & path) const = 0;
 
-    virtual std::string getObjectStorageRootPath() const = 0;
-
 private:
     [[noreturn]] static void throwNotImplemented()
     {
diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp
index 3c77de8f5b7..78fbdcaddfa 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/IObjectStorage.cpp
@@ -1,6 +1,6 @@
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Disks/IO/ThreadPoolRemoteFSReader.h>
-#include <Common/getRandomASCIIString.h>
+#include <Common/Exception.h>
 #include <IO/WriteBufferFromFileBase.h>
 #include <IO/copyData.h>
 #include <IO/ReadBufferFromFileBase.h>
@@ -95,21 +95,4 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
     return settings;
 }
 
-std::string IObjectStorage::generateBlobNameForPath(const std::string & /* path */)
-{
-    /// Path to store the new S3 object.
-
-    /// Total length is 32 a-z characters for enough randomness.
-    /// First 3 characters are used as a prefix for
-    /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
-
-    constexpr size_t key_name_total_size = 32;
-    constexpr size_t key_name_prefix_size = 3;
-
-    /// Path to store new S3 object.
-    return fmt::format("{}/{}",
-        getRandomASCIIString(key_name_prefix_size),
-        getRandomASCIIString(key_name_total_size - key_name_prefix_size));
-}
-
 }
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 032795b380f..1918c197577 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -9,7 +9,6 @@
 #include <Poco/Timestamp.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Core/Defines.h>
-#include <Common/Exception.h>
 #include <IO/ReadSettings.h>
 #include <IO/WriteSettings.h>
 #include <IO/copyData.h>
@@ -17,6 +16,7 @@
 #include <Disks/ObjectStorages/StoredObject.h>
 #include <Disks/DiskType.h>
 #include <Common/ThreadPool_fwd.h>
+#include <Common/ObjectStorageKey.h>
 #include <Disks/WriteMode.h>
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
@@ -35,7 +35,7 @@ using ObjectAttributes = std::map<std::string, std::string>;
 
 struct ObjectMetadata
 {
-    uint64_t size_bytes;
+    uint64_t size_bytes = 0;
     std::optional<Poco::Timestamp> last_modified;
     std::optional<ObjectAttributes> attributes;
 };
@@ -43,16 +43,31 @@ struct ObjectMetadata
 struct RelativePathWithMetadata
 {
     String relative_path;
-    ObjectMetadata metadata{};
+    ObjectMetadata metadata;
 
     RelativePathWithMetadata() = default;
 
-    RelativePathWithMetadata(const String & relative_path_, const ObjectMetadata & metadata_)
-        : relative_path(relative_path_), metadata(metadata_)
+    RelativePathWithMetadata(String relative_path_, ObjectMetadata metadata_)
+        : relative_path(std::move(relative_path_))
+        , metadata(std::move(metadata_))
+    {}
+};
+
+struct ObjectKeyWithMetadata
+{
+    ObjectStorageKey key;
+    ObjectMetadata metadata;
+
+    ObjectKeyWithMetadata() = default;
+
+    ObjectKeyWithMetadata(ObjectStorageKey key_, ObjectMetadata metadata_)
+        : key(std::move(key_))
+        , metadata(std::move(metadata_))
     {}
 };
 
 using RelativePathsWithMetadata = std::vector<RelativePathWithMetadata>;
+using ObjectKeysWithMetadata = std::vector<ObjectKeyWithMetadata>;
 
 class IObjectStorageIterator;
 using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
@@ -176,7 +191,7 @@ public:
 
     /// Generate blob name for passed absolute local path.
     /// Path can be generated either independently or based on `path`.
-    virtual std::string generateBlobNameForPath(const std::string & path);
+    virtual ObjectStorageKey generateObjectKeyForPath(const std::string & path) const = 0;
 
     /// Get unique id for passed absolute path in object storage.
     virtual std::string getUniqueId(const std::string & path) const { return path; }
diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
index cc53df956c6..4cf3c23d5a6 100644
--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
@@ -24,8 +24,9 @@ namespace ErrorCodes
     extern const int CANNOT_UNLINK;
 }
 
-LocalObjectStorage::LocalObjectStorage()
-    : log(&Poco::Logger::get("LocalObjectStorage"))
+LocalObjectStorage::LocalObjectStorage(String key_prefix_)
+    : key_prefix(std::move(key_prefix_))
+    , log(&Poco::Logger::get("LocalObjectStorage"))
 {
     data_source_description.type = DataSourceType::Local;
     if (auto block_device_id = tryGetBlockDeviceId("/"); block_device_id.has_value())
@@ -200,10 +201,10 @@ void LocalObjectStorage::applyNewSettings(
 {
 }
 
-std::string LocalObjectStorage::generateBlobNameForPath(const std::string & /* path */)
+ObjectStorageKey LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
 {
     constexpr size_t key_name_total_size = 32;
-    return getRandomASCIIString(key_name_total_size);
+    return ObjectStorageKey::createAsRelative(key_prefix, getRandomASCIIString(key_name_total_size));
 }
 
 }
diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
index aa3a68731e4..263eb3f7832 100644
--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
@@ -16,7 +16,7 @@ namespace DB
 class LocalObjectStorage : public IObjectStorage
 {
 public:
-    LocalObjectStorage();
+    LocalObjectStorage(String key_prefix_);
 
     DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
 
@@ -78,13 +78,14 @@ public:
         const std::string & config_prefix,
         ContextPtr context) override;
 
-    std::string generateBlobNameForPath(const std::string & path) override;
+    ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override;
 
     bool isRemote() const override { return false; }
 
     ReadSettings patchSettings(const ReadSettings & read_settings) const override;
 
 private:
+    String key_prefix;
     Poco::Logger * log;
     DataSourceDescription data_source_description;
 };
diff --git a/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp
index eb9039fed44..0b2c71fa09d 100644
--- a/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp
@@ -20,23 +20,25 @@ void registerDiskLocalObjectStorage(DiskFactory & factory, bool global_skip_acce
         ContextPtr context,
         const DisksMap & /*map*/) -> DiskPtr
     {
-        String path;
+        String object_key_prefix;
         UInt64 keep_free_space_bytes;
-        loadDiskLocalConfig(name, config, config_prefix, context, path, keep_free_space_bytes);
-        fs::create_directories(path);
+        loadDiskLocalConfig(name, config, config_prefix, context, object_key_prefix, keep_free_space_bytes);
+        /// keys are mapped to the fs, object_key_prefix is a directory also
+        fs::create_directories(object_key_prefix);
 
         String type = config.getString(config_prefix + ".type");
         chassert(type == "local_blob_storage");
 
-        std::shared_ptr<LocalObjectStorage> local_storage = std::make_shared<LocalObjectStorage>();
+        std::shared_ptr<LocalObjectStorage> local_storage = std::make_shared<LocalObjectStorage>(object_key_prefix);
         MetadataStoragePtr metadata_storage;
         auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
-        metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, path);
+        metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, object_key_prefix);
 
         auto disk = std::make_shared<DiskObjectStorage>(
-            name, path, "Local", metadata_storage, local_storage, config, config_prefix);
+            name, object_key_prefix, "Local", metadata_storage, local_storage, config, config_prefix);
         disk->startup(context, global_skip_access_check);
         return disk;
+
     };
     factory.registerDiskType("local_blob_storage", creator);
 }
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
index 53428c2f6e1..91234a3fa05 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
@@ -15,9 +15,9 @@ namespace ErrorCodes
     extern const int FS_METADATA_ERROR;
 }
 
-MetadataStorageFromDisk::MetadataStorageFromDisk(DiskPtr disk_, const std::string & object_storage_root_path_)
+MetadataStorageFromDisk::MetadataStorageFromDisk(DiskPtr disk_, String compatible_key_prefix_)
     : disk(disk_)
-    , object_storage_root_path(object_storage_root_path_)
+    , compatible_key_prefix(compatible_key_prefix_)
 {
 }
 
@@ -85,7 +85,7 @@ std::string MetadataStorageFromDisk::readInlineDataToString(const std::string &
 
 DiskObjectStorageMetadataPtr MetadataStorageFromDisk::readMetadataUnlocked(const std::string & path, std::shared_lock<SharedMutex> &) const
 {
-    auto metadata = std::make_unique<DiskObjectStorageMetadata>(disk->getPath(), object_storage_root_path, path);
+    auto metadata = std::make_unique<DiskObjectStorageMetadata>(compatible_key_prefix, path);
     auto str = readFileToString(path);
     metadata->deserializeFromString(str);
     return metadata;
@@ -93,7 +93,7 @@ DiskObjectStorageMetadataPtr MetadataStorageFromDisk::readMetadataUnlocked(const
 
 DiskObjectStorageMetadataPtr MetadataStorageFromDisk::readMetadataUnlocked(const std::string & path, std::unique_lock<SharedMutex> &) const
 {
-    auto metadata = std::make_unique<DiskObjectStorageMetadata>(disk->getPath(), object_storage_root_path, path);
+    auto metadata = std::make_unique<DiskObjectStorageMetadata>(compatible_key_prefix, path);
     auto str = readFileToString(path);
     metadata->deserializeFromString(str);
     return metadata;
@@ -135,21 +135,16 @@ MetadataTransactionPtr MetadataStorageFromDisk::createTransaction()
 StoredObjects MetadataStorageFromDisk::getStorageObjects(const std::string & path) const
 {
     auto metadata = readMetadata(path);
+    const auto & keys_with_meta = metadata->getKeysWithMeta();
 
-    auto object_storage_relative_paths = metadata->getBlobsRelativePaths(); /// Relative paths.
-
-    StoredObjects object_storage_paths;
-    object_storage_paths.reserve(object_storage_relative_paths.size());
-
-    /// Relative paths -> absolute.
-    for (auto & [object_relative_path, object_meta] : object_storage_relative_paths)
+    StoredObjects objects;
+    objects.reserve(keys_with_meta.size());
+    for (const auto & [object_key, object_meta] : keys_with_meta)
     {
-        auto object_path = fs::path(metadata->getBlobsCommonPrefix()) / object_relative_path;
-        StoredObject object{ object_path, object_meta.size_bytes, path };
-        object_storage_paths.push_back(object);
+        objects.emplace_back(object_key.serialize(), object_meta.size_bytes, path);
     }
 
-    return object_storage_paths;
+    return objects;
 }
 
 uint32_t MetadataStorageFromDisk::getHardlinkCount(const std::string & path) const
@@ -253,8 +248,7 @@ void MetadataStorageFromDiskTransaction::writeInlineDataToFile(
      const std::string & path,
      const std::string & data)
 {
-    auto metadata = std::make_unique<DiskObjectStorageMetadata>(
-        metadata_storage.getDisk()->getPath(), metadata_storage.getObjectStorageRootPath(), path);
+    auto metadata = std::make_unique<DiskObjectStorageMetadata>(metadata_storage.compatible_key_prefix, path);
     metadata->setInlineData(data);
     writeStringToFile(path, metadata->serializeToString());
 }
@@ -318,26 +312,23 @@ void MetadataStorageFromDiskTransaction::setReadOnly(const std::string & path)
 
 void MetadataStorageFromDiskTransaction::createEmptyMetadataFile(const std::string & path)
 {
-    auto metadata = std::make_unique<DiskObjectStorageMetadata>(
-        metadata_storage.getDisk()->getPath(), metadata_storage.getObjectStorageRootPath(), path);
+    auto metadata = std::make_unique<DiskObjectStorageMetadata>(metadata_storage.compatible_key_prefix, path);
     writeStringToFile(path, metadata->serializeToString());
 }
 
-void MetadataStorageFromDiskTransaction::createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes)
+void MetadataStorageFromDiskTransaction::createMetadataFile(const std::string & path, ObjectStorageKey object_key, uint64_t size_in_bytes)
 {
-    DiskObjectStorageMetadataPtr metadata = std::make_unique<DiskObjectStorageMetadata>(
-        metadata_storage.getDisk()->getPath(), metadata_storage.getObjectStorageRootPath(), path);
-
-    metadata->addObject(blob_name, size_in_bytes);
+    auto metadata = std::make_unique<DiskObjectStorageMetadata>(metadata_storage.compatible_key_prefix, path);
+    metadata->addObject(std::move(object_key), size_in_bytes);
 
     auto data = metadata->serializeToString();
     if (!data.empty())
         addOperation(std::make_unique<WriteFileOperation>(path, *metadata_storage.getDisk(), data));
 }
 
-void MetadataStorageFromDiskTransaction::addBlobToMetadata(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes)
+void MetadataStorageFromDiskTransaction::addBlobToMetadata(const std::string & path, ObjectStorageKey object_key, uint64_t size_in_bytes)
 {
-    addOperation(std::make_unique<AddBlobOperation>(path, blob_name, metadata_storage.object_storage_root_path, size_in_bytes, *metadata_storage.disk, metadata_storage));
+    addOperation(std::make_unique<AddBlobOperation>(path, std::move(object_key), size_in_bytes, *metadata_storage.disk, metadata_storage));
 }
 
 UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromDiskTransaction::unlinkMetadata(const std::string & path)
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h
index b518f5e3622..4116659ab9a 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h
@@ -22,12 +22,11 @@ private:
     friend class MetadataStorageFromDiskTransaction;
 
     mutable SharedMutex metadata_mutex;
-
     DiskPtr disk;
-    std::string object_storage_root_path;
+    String compatible_key_prefix;
 
 public:
-    MetadataStorageFromDisk(DiskPtr disk_, const std::string & object_storage_root_path_);
+    MetadataStorageFromDisk(DiskPtr disk_, String compatible_key_prefix);
 
     MetadataTransactionPtr createTransaction() override;
 
@@ -67,8 +66,6 @@ public:
 
     StoredObjects getStorageObjects(const std::string & path) const override;
 
-    std::string getObjectStorageRootPath() const override { return object_storage_root_path; }
-
     DiskObjectStorageMetadataPtr readMetadata(const std::string & path) const;
 
     DiskObjectStorageMetadataPtr readMetadataUnlocked(const std::string & path, std::unique_lock<SharedMutex> & lock) const;
@@ -104,9 +101,9 @@ public:
 
     void createEmptyMetadataFile(const std::string & path) override;
 
-    void createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override;
+    void createMetadataFile(const std::string & path, ObjectStorageKey object_key, uint64_t size_in_bytes) override;
 
-    void addBlobToMetadata(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override;
+    void addBlobToMetadata(const std::string & path, ObjectStorageKey object_key, uint64_t size_in_bytes) override;
 
     void setLastModified(const std::string & path, const Poco::Timestamp & timestamp) override;
 
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
index 78e8764f8fc..1357acdfc66 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
@@ -294,9 +294,9 @@ void AddBlobOperation::execute(std::unique_lock<SharedMutex> & metadata_lock)
     if (metadata_storage.exists(path))
         metadata = metadata_storage.readMetadataUnlocked(path, metadata_lock);
     else
-        metadata = std::make_unique<DiskObjectStorageMetadata>(disk.getPath(), root_path, path);
+        metadata = std::make_unique<DiskObjectStorageMetadata>(disk.getPath(), path);
 
-    metadata->addObject(blob_name, size_in_bytes);
+    metadata->addObject(object_key, size_in_bytes);
 
     write_operation = std::make_unique<WriteFileOperation>(path, disk, metadata->serializeToString());
 
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h
index ccb77f6ae7b..e8fda177b95 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h
@@ -216,14 +216,12 @@ struct AddBlobOperation final : public IMetadataOperation
 {
     AddBlobOperation(
         const std::string & path_,
-        const std::string & blob_name_,
-        const std::string & root_path_,
+        ObjectStorageKey object_key_,
         uint64_t size_in_bytes_,
         IDisk & disk_,
         const MetadataStorageFromDisk & metadata_storage_)
         : path(path_)
-        , blob_name(blob_name_)
-        , root_path(root_path_)
+        , object_key(std::move(object_key_))
         , size_in_bytes(size_in_bytes_)
         , disk(disk_)
         , metadata_storage(metadata_storage_)
@@ -235,8 +233,7 @@ struct AddBlobOperation final : public IMetadataOperation
 
 private:
     std::string path;
-    std::string blob_name;
-    std::string root_path;
+    ObjectStorageKey object_key;
     uint64_t size_in_bytes;
     IDisk & disk;
     const MetadataStorageFromDisk & metadata_storage;
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
index 022ff86df50..5f1d1f7f7f2 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
@@ -12,9 +12,9 @@ namespace DB
 
 MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage(
     ObjectStoragePtr object_storage_,
-    const std::string & object_storage_root_path_)
+    String storage_path_prefix_)
     : object_storage(object_storage_)
-    , object_storage_root_path(object_storage_root_path_)
+    , storage_path_prefix(std::move(storage_path_prefix_))
 {
 }
 
@@ -25,19 +25,15 @@ MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction(
 
 const std::string & MetadataStorageFromPlainObjectStorage::getPath() const
 {
-    return object_storage_root_path;
-}
-std::filesystem::path MetadataStorageFromPlainObjectStorage::getAbsolutePath(const std::string & path) const
-{
-    return fs::path(object_storage_root_path) / path;
+    return storage_path_prefix;
 }
 
 bool MetadataStorageFromPlainObjectStorage::exists(const std::string & path) const
 {
     /// NOTE: exists() cannot be used here since it works only for existing
     /// key, and does not work for some intermediate path.
-    std::string abs_path = getAbsolutePath(path);
-    return object_storage->existsOrHasAnyChild(abs_path);
+    auto object_key = object_storage->generateObjectKeyForPath(path);
+    return object_storage->existsOrHasAnyChild(object_key.serialize());
 }
 
 bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) const
@@ -48,7 +44,8 @@ bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) con
 
 bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const
 {
-    std::string directory = getAbsolutePath(path);
+    auto object_key = object_storage->generateObjectKeyForPath(path);
+    std::string directory = object_key.serialize();
     if (!directory.ends_with('/'))
         directory += '/';
 
@@ -59,8 +56,8 @@ bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path
 
 uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const
 {
-    RelativePathsWithMetadata children;
-    auto metadata = object_storage->tryGetObjectMetadata(getAbsolutePath(path));
+    auto object_key = object_storage->generateObjectKeyForPath(path);
+    auto metadata = object_storage->tryGetObjectMetadata(object_key.serialize());
     if (metadata)
         return metadata->size_bytes;
     return 0;
@@ -68,12 +65,14 @@ uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path)
 
 std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const
 {
-    RelativePathsWithMetadata files;
-    std::string abs_path = getAbsolutePath(path);
-    if (!abs_path.ends_with('/'))
-        abs_path += '/';
+    auto object_key = object_storage->generateObjectKeyForPath(path);
 
-    object_storage->listObjects(abs_path, files, 0);
+    RelativePathsWithMetadata files;
+    std::string abs_key = object_key.serialize();
+    if (!abs_key.ends_with('/'))
+        abs_key += '/';
+
+    object_storage->listObjects(abs_key, files, 0);
 
     std::vector<std::string> result;
     for (const auto & path_size : files)
@@ -84,8 +83,8 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(co
     std::unordered_set<std::string> duplicates_filter;
     for (auto & row : result)
     {
-        chassert(row.starts_with(abs_path));
-        row.erase(0, abs_path.size());
+        chassert(row.starts_with(abs_key));
+        row.erase(0, abs_key.size());
         auto slash_pos = row.find_first_of('/');
         if (slash_pos != std::string::npos)
             row.erase(slash_pos, row.size() - slash_pos);
@@ -105,10 +104,9 @@ DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(con
 
 StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std::string & path) const
 {
-    std::string blob_name = object_storage->generateBlobNameForPath(path);
-    size_t object_size = getFileSize(blob_name);
-    auto object = StoredObject(getAbsolutePath(blob_name), object_size, path);
-    return {std::move(object)};
+    size_t object_size = getFileSize(path);
+    auto object_key = object_storage->generateObjectKeyForPath(path);
+    return {StoredObject(object_key.serialize(), object_size, path)};
 }
 
 const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const
@@ -118,7 +116,8 @@ const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getSt
 
 void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::string & path)
 {
-    auto object = StoredObject(metadata_storage.getAbsolutePath(path));
+    auto object_key = metadata_storage.object_storage->generateObjectKeyForPath(path);
+    auto object = StoredObject(object_key.serialize());
     metadata_storage.object_storage->removeObject(object);
 }
 
@@ -131,7 +130,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectoryRecursive(
     /// Noop. It is an Object Storage not a filesystem.
 }
 void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata(
-    const std::string &, const std::string & /* blob_name */, uint64_t /* size_in_bytes */)
+    const std::string &, ObjectStorageKey /* object_key */, uint64_t /* size_in_bytes */)
 {
     /// Noop, local metadata files is only one file, it is the metadata file itself.
 }
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h
index bd068c1362f..2ef823d07a4 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h
@@ -29,12 +29,10 @@ private:
     friend class MetadataStorageFromPlainObjectStorageTransaction;
 
     ObjectStoragePtr object_storage;
-    std::string object_storage_root_path;
+    String storage_path_prefix;
 
 public:
-    MetadataStorageFromPlainObjectStorage(
-        ObjectStoragePtr object_storage_,
-        const std::string & object_storage_root_path_);
+    MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_);
 
     MetadataTransactionPtr createTransaction() override;
 
@@ -56,8 +54,6 @@ public:
 
     StoredObjects getStorageObjects(const std::string & path) const override;
 
-    std::string getObjectStorageRootPath() const override { return object_storage_root_path; }
-
     Poco::Timestamp getLastModified(const std::string & /* path */) const override
     {
         /// Required by MergeTree
@@ -71,9 +67,6 @@ public:
 
     bool supportsChmod() const override { return false; }
     bool supportsStat() const override { return false; }
-
-private:
-    std::filesystem::path getAbsolutePath(const std::string & path) const;
 };
 
 class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction
@@ -89,14 +82,14 @@ public:
 
     const IMetadataStorage & getStorageForNonTransactionalReads() const override;
 
-    void addBlobToMetadata(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override;
+    void addBlobToMetadata(const std::string & path, ObjectStorageKey object_key, uint64_t size_in_bytes) override;
 
     void createEmptyMetadataFile(const std::string & /* path */) override
     {
         /// No metadata, no need to create anything.
     }
 
-    void createMetadataFile(const std::string & /* path */, const std::string & /* blob_name */, uint64_t /* size_in_bytes */) override
+    void createMetadataFile(const std::string & /* path */, ObjectStorageKey /* object_key */, uint64_t /* size_in_bytes */) override
     {
         /// Noop
     }
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 8f020e0d1ac..b36185249af 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -17,6 +17,7 @@
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Disks/ObjectStorages/S3/diskSettings.h>
 
+#include <Common/getRandomASCIIString.h>
 #include <Common/ProfileEvents.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/logger_useful.h>
@@ -127,7 +128,10 @@ private:
             result = !objects.empty();
 
             for (const auto & object : objects)
-                batch.emplace_back(object.GetKey(), ObjectMetadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}});
+                batch.emplace_back(
+                    object.GetKey(),
+                    ObjectMetadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}}
+                );
 
             if (result)
                 request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
@@ -293,7 +297,12 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
             break;
 
         for (const auto & object : objects)
-            children.emplace_back(object.GetKey(), ObjectMetadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}});
+            children.emplace_back(
+                object.GetKey(),
+                ObjectMetadata{
+                    static_cast<uint64_t>(object.GetSize()),
+                    Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()),
+                    {}});
 
         if (max_keys)
         {
@@ -524,12 +533,33 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
     return std::make_unique<S3ObjectStorage>(
         std::move(new_client), std::move(new_s3_settings),
         version_id, s3_capabilities, new_namespace,
-        endpoint);
+        endpoint, object_key_prefix);
 }
 
 S3ObjectStorage::Clients::Clients(std::shared_ptr<S3::Client> client_, const S3ObjectStorageSettings & settings)
     : client(std::move(client_)), client_with_long_timeout(client->clone(std::nullopt, settings.request_settings.long_request_timeout_ms)) {}
 
+ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string &) const
+{
+    /// Path to store the new S3 object.
+
+    /// Total length is 32 a-z characters for enough randomness.
+    /// First 3 characters are used as a prefix for
+    /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
+
+    constexpr size_t key_name_total_size = 32;
+    constexpr size_t key_name_prefix_size = 3;
+
+    /// Path to store new S3 object.
+    String key = fmt::format("{}/{}",
+                             getRandomASCIIString(key_name_prefix_size),
+                             getRandomASCIIString(key_name_total_size - key_name_prefix_size));
+
+    /// what ever key_prefix value is, consider that key as relative
+    return ObjectStorageKey::createAsRelative(object_key_prefix, key);
+}
+
+
 }
 
 #endif
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 6e516b39c88..b1b3fb22366 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -59,8 +59,10 @@ private:
         String version_id_,
         const S3Capabilities & s3_capabilities_,
         String bucket_,
-        String connection_string)
-        : bucket(bucket_)
+        String connection_string,
+        String object_key_prefix_)
+        : bucket(std::move(bucket_))
+        , object_key_prefix(std::move(object_key_prefix_))
         , clients(std::make_unique<Clients>(std::move(client_), *s3_settings_))
         , s3_settings(std::move(s3_settings_))
         , s3_capabilities(s3_capabilities_)
@@ -170,13 +172,17 @@ public:
 
     bool supportParallelWrite() const override { return true; }
 
+    ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override;
+
 private:
     void setNewSettings(std::unique_ptr<S3ObjectStorageSettings> && s3_settings_);
 
     void removeObjectImpl(const StoredObject & object, bool if_exists);
     void removeObjectsImpl(const StoredObjects & objects, bool if_exists);
 
+private:
     std::string bucket;
+    String object_key_prefix;
 
     MultiVersion<Clients> clients;
     MultiVersion<S3ObjectStorageSettings> s3_settings;
@@ -195,7 +201,11 @@ private:
 class S3PlainObjectStorage : public S3ObjectStorage
 {
 public:
-    std::string generateBlobNameForPath(const std::string & path) override { return path; }
+    ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override
+    {
+        return ObjectStorageKey::createAsRelative(object_key_prefix, path);
+    }
+
     std::string getName() const override { return "S3PlainObjectStorage"; }
 
     template <class ...Args>
diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
index 91f647cbd8b..663d8b777e8 100644
--- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
+++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
@@ -126,12 +126,15 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
             if (config.getBool(config_prefix + ".send_metadata", false))
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "s3_plain does not supports send_metadata");
 
-            s3_storage = std::make_shared<S3PlainObjectStorage>(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint);
+            s3_storage = std::make_shared<S3PlainObjectStorage>(
+                std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, uri.key);
+
             metadata_storage = std::make_shared<MetadataStorageFromPlainObjectStorage>(s3_storage, uri.key);
         }
         else
         {
-            s3_storage = std::make_shared<S3ObjectStorage>(std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint);
+            s3_storage = std::make_shared<S3ObjectStorage>(
+                std::move(client), std::move(settings), uri.version_id, s3_capabilities, uri.bucket, uri.endpoint, uri.key);
             auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
             metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri.key);
         }
diff --git a/src/Disks/ObjectStorages/StoredObject.h b/src/Disks/ObjectStorages/StoredObject.h
index 8afbb116a83..4a03743e310 100644
--- a/src/Disks/ObjectStorages/StoredObject.h
+++ b/src/Disks/ObjectStorages/StoredObject.h
@@ -1,8 +1,11 @@
 #pragma once
 
+#include <base/types.h>
+
+#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
+
 #include <functional>
 #include <string>
-#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
 
 
 namespace DB
@@ -11,20 +14,32 @@ namespace DB
 /// Object metadata: path, size, path_key_for_cache.
 struct StoredObject
 {
-    std::string remote_path;
-    std::string local_path; /// or equivalent "metadata_path"
+    String remote_path; /// abs path
+    String local_path; /// or equivalent "metadata_path"
 
     uint64_t bytes_size = 0;
 
     StoredObject() = default;
 
-    explicit StoredObject(
-        const std::string & remote_path_,
-        uint64_t bytes_size_ = 0,
-        const std::string & local_path_ = "")
-    : remote_path(remote_path_)
-    , local_path(local_path_)
-    , bytes_size(bytes_size_) {}
+    explicit StoredObject(String remote_path_)
+        : remote_path(std::move(remote_path_))
+    {}
+
+    StoredObject(
+        String remote_path_,
+        uint64_t bytes_size_)
+        : remote_path(std::move(remote_path_))
+        , bytes_size(bytes_size_)
+    {}
+
+    StoredObject(
+        String remote_path_,
+        uint64_t bytes_size_,
+        String local_path_)
+        : remote_path(std::move(remote_path_))
+        , local_path(std::move(local_path_))
+        , bytes_size(bytes_size_)
+    {}
 };
 
 using StoredObjects = std::vector<StoredObject>;
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
index fa07ef8590a..2d1ae41eb05 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
@@ -28,7 +28,8 @@ MetadataTransactionPtr MetadataStorageFromStaticFilesWebServer::createTransactio
 
 const std::string & MetadataStorageFromStaticFilesWebServer::getPath() const
 {
-    return root_path;
+    static const String no_root;
+    return no_root;
 }
 
 bool MetadataStorageFromStaticFilesWebServer::exists(const std::string & path) const
@@ -96,7 +97,7 @@ std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(
     for (const auto & [file_path, _] : object_storage.files)
     {
         if (file_path.starts_with(path))
-            result.push_back(file_path);
+            result.push_back(file_path); /// It looks more like recursive listing, not sure it is right
     }
     return result;
 }
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
index 96c749ad80c..1b17cac994d 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
@@ -16,12 +16,9 @@ private:
     using FileType = WebObjectStorage::FileType;
 
     const WebObjectStorage & object_storage;
-    std::string root_path;
 
     void assertExists(const std::string & path) const;
 
-    void initializeImpl(const String & uri_path, const std::unique_lock<std::shared_mutex> &) const;
-
 public:
     explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_);
 
@@ -43,8 +40,6 @@ public:
 
     StoredObjects getStorageObjects(const std::string & path) const override;
 
-    std::string getObjectStorageRootPath() const override { return ""; }
-
     struct stat stat(const String & /* path */) const override { return {}; }
 
     Poco::Timestamp getLastModified(const std::string & /* path */) const override
@@ -80,7 +75,7 @@ public:
         /// No metadata, no need to create anything.
     }
 
-    void createMetadataFile(const std::string & /* path */, const std::string & /* blob_name */, uint64_t /* size_in_bytes */) override
+    void createMetadataFile(const std::string & /* path */, ObjectStorageKey /* object_key */, uint64_t /* size_in_bytes */) override
     {
         /// Noop
     }
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
index 089bdb99e71..4bd96825818 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
@@ -89,7 +89,10 @@ public:
         const std::string & config_prefix,
         ContextPtr context) override;
 
-    std::string generateBlobNameForPath(const std::string & path) override { return path; }
+    ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override
+    {
+        return ObjectStorageKey::createAsRelativeAnyway(path);
+    }
 
     bool isRemote() const override { return true; }
 
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index d0115bf84df..2e0703a8df3 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -843,8 +843,10 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
     /// We don't have to list bucket, because there is no asterisks.
     if (key_prefix.size() == blob_path_with_globs.size())
     {
-        ObjectMetadata object_metadata = object_storage->getObjectMetadata(blob_path_with_globs);
-        blobs_with_metadata.emplace_back(blob_path_with_globs, object_metadata);
+        auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs);
+        blobs_with_metadata.emplace_back(
+            blob_path_with_globs,
+            object_metadata);
         if (outer_blobs)
             outer_blobs->emplace_back(blobs_with_metadata.back());
         if (file_progress_callback)
@@ -923,8 +925,10 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
         blobs_with_metadata = std::move(new_batch);
         if (file_progress_callback)
         {
-            for (const auto & [_, info] : blobs_with_metadata)
+            for (const auto & [relative_path, info] : blobs_with_metadata)
+            {
                 file_progress_callback(FileProgress(0, info.size_bytes));
+            }
         }
     }
 
@@ -970,7 +974,7 @@ StorageAzureBlobSource::KeysIterator::KeysIterator(
         ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
         if (file_progress_callback)
             file_progress_callback(FileProgress(0, object_metadata.size_bytes));
-        keys.emplace_back(RelativePathWithMetadata{key, object_metadata});
+        keys.emplace_back(key, object_metadata);
     }
 
     if (outer_blobs)
@@ -1114,7 +1118,8 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader()
     QueryPipelineBuilder builder;
     std::shared_ptr<ISource> source;
     std::unique_ptr<ReadBuffer> read_buf;
-    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt;
+    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files
+        ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt;
     if (num_rows_from_cache)
     {
         /// We should not return single chunk with all number of rows,
diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp
index eb514d3b3f4..9e63fbcf693 100644
--- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp
+++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp
@@ -65,7 +65,7 @@ Pipe StorageSystemRemoteDataPaths::read(
             if (disk->supportsCache())
                 cache = FileCacheFactory::instance().getByName(disk->getCacheName()).cache;
 
-            for (const auto & [local_path, common_prefox_for_objects, storage_objects] : remote_paths_by_local_path)
+            for (const auto & [local_path, storage_objects] : remote_paths_by_local_path)
             {
                 for (const auto & object : storage_objects)
                 {
@@ -78,7 +78,9 @@ Pipe StorageSystemRemoteDataPaths::read(
                     col_local_path->insert(local_path);
                     col_remote_path->insert(object.remote_path);
                     col_size->insert(object.bytes_size);
-                    col_namespace->insert(common_prefox_for_objects);
+
+                    col_namespace->insertDefault();
+                    //col_namespace->insert(common_prefox_for_objects);
 
                     if (cache)
                     {
diff --git a/tests/integration/test_remote_blobs_naming/__init__.py b/tests/integration/test_remote_blobs_naming/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_remote_blobs_naming/configs/settings.xml b/tests/integration/test_remote_blobs_naming/configs/settings.xml
new file mode 100644
index 00000000000..8e8d870d1ba
--- /dev/null
+++ b/tests/integration/test_remote_blobs_naming/configs/settings.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<clickhouse>
+    <profiles>
+        <default>
+            <s3_check_objects_after_upload>1</s3_check_objects_after_upload>
+            <enable_s3_requests_logging>1</enable_s3_requests_logging>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_remote_blobs_naming/configs/settings_new.xml b/tests/integration/test_remote_blobs_naming/configs/settings_new.xml
new file mode 100644
index 00000000000..1c5e0b0ee60
--- /dev/null
+++ b/tests/integration/test_remote_blobs_naming/configs/settings_new.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<clickhouse>
+    <profiles>
+        <default>
+            <s3_check_objects_after_upload>1</s3_check_objects_after_upload>
+            <enable_s3_requests_logging>1</enable_s3_requests_logging>
+            <storage_metadata_write_full_object_key>1</storage_metadata_write_full_object_key>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml b/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml
new file mode 100644
index 00000000000..31c6a3bf968
--- /dev/null
+++ b/tests/integration/test_remote_blobs_naming/configs/storage_conf.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<clickhouse>
+    <logger>
+        <level>test</level>
+    </logger>
+
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+            </s3>
+            <s3_plain>
+               <type>s3_plain</type>
+               <endpoint>http://minio1:9001/root/data/s3_pain_key_prefix</endpoint>
+               <access_key_id>minio</access_key_id>
+               <secret_access_key>minio123</secret_access_key>
+               <s3_allow_native_copy>true</s3_allow_native_copy>
+           </s3_plain>
+        </disks>
+
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                </volumes>
+            </s3>
+
+            <s3_plain>
+                <volumes>
+                    <main>
+                        <disk>s3_plain</disk>
+                    </main>
+                </volumes>
+            </s3_plain>
+        </policies>
+    </storage_configuration>
+
+    <merge_tree>
+        <storage_policy>s3</storage_policy>
+    </merge_tree>
+</clickhouse>
diff --git a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py
new file mode 100644
index 00000000000..22582f90ba6
--- /dev/null
+++ b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py
@@ -0,0 +1,296 @@
+#!/usr/bin/env python3
+
+import logging
+import pytest
+
+import os
+from helpers.cluster import ClickHouseCluster
+
+
+@pytest.fixture(scope="module")
+def cluster():
+    cluster = ClickHouseCluster(__file__)
+    cluster.add_instance(
+        "node",
+        main_configs=[
+            "configs/storage_conf.xml",
+        ],
+        user_configs=[
+            "configs/settings.xml",
+        ],
+        with_minio=True,
+        macros={"replica": "1"},
+        with_zookeeper=True,
+    )
+    cluster.add_instance(
+        "new_node",
+        main_configs=[
+            "configs/storage_conf.xml",
+        ],
+        user_configs=[
+            "configs/settings_new.xml",
+        ],
+        with_minio=True,
+        macros={"replica": "2"},
+        with_zookeeper=True,
+    )
+    cluster.add_instance(
+        "switching_node",
+        main_configs=[
+            "configs/storage_conf.xml",
+        ],
+        user_configs=[
+            "configs/settings.xml",
+        ],
+        with_minio=True,
+        with_zookeeper=True,
+        stay_alive=True,
+    )
+    logging.info("Starting cluster...")
+    cluster.start()
+    logging.info("Cluster started")
+
+    yield cluster
+
+    # Actually, try/finally section is excess in pytest.fixtures
+    cluster.shutdown()
+
+
+def get_part_path(node, table, part_name):
+    part_path = node.query(
+        f"SELECT path FROM system.parts WHERE table = '{table}' and name = '{part_name}'"
+    ).strip()
+
+    return os.path.normpath(part_path)
+
+
+def get_first_part_name(node, table):
+    part_name = node.query(
+        f"SELECT name FROM system.parts WHERE table = '{table}' and active LIMIT 1"
+    ).strip()
+    return part_name
+
+
+def read_file(node, file_path):
+    return node.exec_in_container(["bash", "-c", f"cat {file_path}"])
+
+
+def write_file(node, file_path, data):
+    node.exec_in_container(["bash", "-c", f"echo '{data}' > {file_path}"])
+
+
+def find_keys_for_local_path(node, local_path):
+    remote = node.query(
+        f"""
+            SELECT
+                remote_path
+            FROM
+                system.remote_data_paths
+            WHERE
+                concat(path, local_path) = '{local_path}'
+            """
+    ).split("\n")
+    return [x for x in remote if x]
+
+
+def test_read_new_format(cluster):
+    node = cluster.instances["node"]
+
+    node.query(
+        """
+        CREATE TABLE test_read_new_format (
+            id Int64,
+            data String
+        ) ENGINE=MergeTree()
+        ORDER BY id
+        """
+    )
+
+    node.query("INSERT INTO test_read_new_format VALUES (1, 'Hello')")
+
+    part_name = get_first_part_name(node, "test_read_new_format")
+    part_path = get_part_path(node, "test_read_new_format", part_name)
+    primary_idx = os.path.join(part_path, "primary.cidx")
+
+    remote = find_keys_for_local_path(node, primary_idx)
+    assert len(remote) == 1
+    remote = remote[0]
+
+    node.query(f"ALTER TABLE test_read_new_format DETACH PART '{part_name}'")
+
+    detached_primary_idx = os.path.join(
+        os.path.dirname(part_path), "detached", part_name, "primary.cidx"
+    )
+
+    # manually change the metadata format and see that CH reads it correctly
+    meta_data = read_file(node, detached_primary_idx)
+    lines = meta_data.split("\n")
+    object_size, object_key = lines[2].split("\t")
+    assert remote.endswith(object_key), object_key
+    assert remote != object_key
+    lines[2] = f"{object_size}\t{remote}"
+    lines[0] = "5"
+
+    write_file(node, detached_primary_idx, "\n".join(lines))
+
+    active_count = node.query(
+        f"SELECT count() FROM system.parts WHERE table = 'test_read_new_format' and active"
+    ).strip()
+    assert active_count == "0", active_count
+
+    node.query(f"ALTER TABLE test_read_new_format ATTACH PART '{part_name}'")
+
+    active_count = node.query(
+        f"SELECT count() FROM system.parts WHERE table = 'test_read_new_format' and active"
+    ).strip()
+    assert active_count == "1", active_count
+
+    values = node.query(f"SELECT * FROM test_read_new_format").split("\n")
+    values = [x for x in values if x]
+    assert values == ["1\tHello"], values
+
+    # part name has changed after attach
+    part_name = get_first_part_name(node, "test_read_new_format")
+    part_path = get_part_path(node, "test_read_new_format", part_name)
+    primary_idx = os.path.join(part_path, "primary.cidx")
+
+    new_remote = find_keys_for_local_path(node, primary_idx)
+    assert len(new_remote) == 1
+    new_remote = new_remote[0]
+    assert remote == new_remote
+
+
+def test_write_new_format(cluster):
+    node = cluster.instances["new_node"]
+
+    node.query(
+        """
+        CREATE TABLE test_read_new_format (
+            id Int64,
+            data String
+        ) ENGINE=MergeTree()
+        ORDER BY id
+        """
+    )
+
+    node.query("INSERT INTO test_read_new_format VALUES (1, 'Hello')")
+
+    part_name = get_first_part_name(node, "test_read_new_format")
+    part_path = get_part_path(node, "test_read_new_format", part_name)
+    primary_idx = os.path.join(part_path, "primary.cidx")
+
+    remote = find_keys_for_local_path(node, primary_idx)
+    assert len(remote) == 1
+    remote = remote[0]
+
+    node.query(f"ALTER TABLE test_read_new_format DETACH PART '{part_name}'")
+
+    detached_primary_idx = os.path.join(
+        os.path.dirname(part_path), "detached", part_name, "primary.cidx"
+    )
+
+    # manually change the metadata format and see that CH reads it correctly
+    meta_data = read_file(node, detached_primary_idx)
+    lines = meta_data.split("\n")
+    object_size, object_key = lines[2].split("\t")
+    assert remote.endswith(object_key), object_key
+    assert remote == object_key
+
+
+@pytest.mark.parametrize("storage_policy", ["s3", "s3_plain"])
+def test_replicated_merge_tree(cluster, storage_policy):
+    if storage_policy == "s3_plain":
+        # MergeTree table doesn't work on s3_plain. Rename operation is not implemented
+        return
+
+    node_old = cluster.instances["node"]
+    node_new = cluster.instances["new_node"]
+
+    create_table_statement = f"""
+        CREATE TABLE test_replicated_merge_tree (
+            id Int64,
+            val String
+        ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_replicated_merge_tree_{storage_policy}', '{{replica}}')
+        PARTITION BY id
+        ORDER BY (id, val)
+        SETTINGS
+            storage_policy='{storage_policy}'
+        """
+
+    node_old.query(create_table_statement)
+    node_new.query(create_table_statement)
+
+    node_old.query("INSERT INTO test_replicated_merge_tree VALUES (0, 'a')")
+    node_new.query("INSERT INTO test_replicated_merge_tree VALUES (1, 'b')")
+
+    # node_old have to fetch metadata from node_new and vice versa
+    node_old.query("SYSTEM SYNC REPLICA test_replicated_merge_tree")
+    node_new.query("SYSTEM SYNC REPLICA test_replicated_merge_tree")
+
+    count_old = node_old.query("SELECT count() FROM test_replicated_merge_tree").strip()
+    count_new = node_new.query("SELECT count() FROM test_replicated_merge_tree").strip()
+
+    assert count_old == "2"
+    assert count_new == "2"
+
+    node_old.query("DROP TABLE test_replicated_merge_tree SYNC")
+    node_new.query("DROP TABLE test_replicated_merge_tree SYNC")
+
+
+def switch_config_write_full_object_key(node, enable):
+    setting_path = "/etc/clickhouse-server/users.d/settings.xml"
+    data = read_file(node, setting_path)
+
+    assert data != ""
+
+    is_on = "<storage_metadata_write_full_object_key>1</storage_metadata_write_full_object_key>"
+    is_off = "<storage_metadata_write_full_object_key>0</storage_metadata_write_full_object_key>"
+
+    enable_line = is_off
+    if enable:
+        enable_line = is_on
+
+    if is_on in data:
+        data = data.replace(is_on, enable_line)
+    else:
+        data = data.replace(is_off, enable_line)
+
+    write_file(node, setting_path, data)
+    node.restart_clickhouse()
+
+
+@pytest.mark.parametrize("storage_policy", ["s3", "s3_plain"])
+def test_log_table(cluster, storage_policy):
+    if storage_policy == "s3_plain":
+        # Log table doesn't work on s3_plain. Rename operation is not implemented
+        return
+
+    node = cluster.instances["switching_node"]
+
+    create_table_statement = f"""
+        CREATE TABLE test_log_table (
+            id Int64,
+            val String
+        ) ENGINE=Log
+        SETTINGS
+            storage_policy='{storage_policy}'
+        """
+
+    node.query(create_table_statement)
+
+    node.query("INSERT INTO test_log_table VALUES (0, 'a')")
+    assert "1" == node.query("SELECT count() FROM test_log_table").strip()
+
+    switch_config_write_full_object_key(node, True)
+    node.query("INSERT INTO test_log_table VALUES (0, 'a')")
+    assert "2" == node.query("SELECT count() FROM test_log_table").strip()
+
+    switch_config_write_full_object_key(node, False)
+    node.query("INSERT INTO test_log_table VALUES (1, 'b')")
+    assert "3" == node.query("SELECT count() FROM test_log_table").strip()
+
+    switch_config_write_full_object_key(node, True)
+    node.query("INSERT INTO test_log_table VALUES (2, 'c')")
+    assert "4" == node.query("SELECT count() FROM test_log_table").strip()
+
+    node.query("DROP TABLE test_log_table SYNC")

From c1aa49122f8ea76a57770b3f8cc2a32097e7f510 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Thu, 2 Nov 2023 15:56:27 +0100
Subject: [PATCH 12/80] Collect addresses in stack traces

---
 utils/clickhouse-diagnostics/clickhouse-diagnostics | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utils/clickhouse-diagnostics/clickhouse-diagnostics b/utils/clickhouse-diagnostics/clickhouse-diagnostics
index 5cacbf1d4d4..8e23cc8d0e1 100755
--- a/utils/clickhouse-diagnostics/clickhouse-diagnostics
+++ b/utils/clickhouse-diagnostics/clickhouse-diagnostics
@@ -453,10 +453,10 @@ LIMIT 10
 SELECT_STACK_TRACES = r"""SELECT
     '\n' || arrayStringConcat(
        arrayMap(
-           x,
-           y -> concat(x, ': ', y),
+           x, y, z -> concat(x, ': ', y, ' @ ', z),
            arrayMap(x -> addressToLine(x), trace),
-           arrayMap(x -> demangle(addressToSymbol(x)), trace)),
+           arrayMap(x -> demangle(addressToSymbol(x)), trace),
+           arrayMap(x -> '0x' || hex(x), trace)),
        '\n') AS trace
 FROM system.stack_trace
 """

From f21e294330735bd0652fbc8ef852bb9b297ab549 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Thu, 2 Nov 2023 16:04:54 +0100
Subject: [PATCH 13/80] Update readme

---
 utils/clickhouse-diagnostics/README.md | 35 +++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/utils/clickhouse-diagnostics/README.md b/utils/clickhouse-diagnostics/README.md
index aed5e19ee45..9a86ad535fd 100644
--- a/utils/clickhouse-diagnostics/README.md
+++ b/utils/clickhouse-diagnostics/README.md
@@ -2574,16 +2574,43 @@ Settings:                            {}
 SELECT
     '\n' || arrayStringConcat(
        arrayMap(
-           x,
-           y -> concat(x, ': ', y),
+           x, y, z -> concat(x, ': ', y, ' @ ', z),
            arrayMap(x -> addressToLine(x), trace),
-           arrayMap(x -> demangle(addressToSymbol(x)), trace)),
+           arrayMap(x -> demangle(addressToSymbol(x)), trace),
+           arrayMap(x -> '0x' || hex(x), trace)),
        '\n') AS trace
 FROM system.stack_trace
 ```
 **result**
 ```
-ClickhouseError("Code: 446. DB::Exception: default: Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0: While processing concat('\\n', arrayStringConcat(arrayMap((x, y) -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\\n')) AS trace. (FUNCTION_NOT_ALLOWED) (version 21.11.8.4 (official build))",)
+Row 1:
+──────
+trace: 
+:  @ 0x7F6694A91117
+:  @ 0x7F6694A93A41
+./build/./contrib/llvm-project/libcxx/src/condition_variable.cpp:47: std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&) @ 0x16F4A56F
+./build/./contrib/llvm-project/libcxx/include/atomic:958: BaseDaemon::waitForTerminationRequest() @ 0x0B85564B
+./build/./contrib/llvm-project/libcxx/include/vector:434: DB::Server::main(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>> const&) @ 0x0B6644CE
+./build/./base/poco/Util/src/Application.cpp:0: Poco::Util::Application::run() @ 0x1489B8A6
+./build/./programs/server/Server.cpp:402: DB::Server::run() @ 0x0B651E91
+./build/./base/poco/Util/src/ServerApplication.cpp:132: Poco::Util::ServerApplication::run(int, char**) @ 0x148AF4F1
+./build/./programs/server/Server.cpp:0: mainEntryClickHouseServer(int, char**) @ 0x0B64FA96
+./build/./programs/main.cpp:0: main @ 0x06AB8C92
+:  @ 0x7F6694A29D90
+:  @ 0x7F6694A29E40
+./build/./programs/clickhouse: _start @ 0x06AB802E
+
+Row 2:
+──────
+trace: 
+:  @ 0x7F6694B14A0C
+./build/./src/IO/ReadBufferFromFileDescriptor.cpp:0: DB::ReadBufferFromFileDescriptor::readImpl(char*, unsigned long, unsigned long, unsigned long) @ 0x0B622EAB
+./build/./src/IO/ReadBufferFromFileDescriptor.cpp:126: DB::ReadBufferFromFileDescriptor::nextImpl() @ 0x0B6231A0
+./build/./src/IO/ReadBuffer.h:70: SignalListener::run() @ 0x0B85631D
+./build/./base/poco/Foundation/include/Poco/SharedPtr.h:139: Poco::ThreadImpl::runnableEntry(void*) @ 0x149CA102
+:  @ 0x7F6694A94AC3
+:  @ 0x7F6694B26A40
+
 ```
 #### uname
 **command**

From 0ce1560e5d6e4d4703d539cee412db408b73b95f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 2 Nov 2023 18:30:32 +0100
Subject: [PATCH 14/80] retry if table replica being restarted

---
 src/Interpreters/DDLWorker.cpp              | 7 ++++++-
 src/Interpreters/InterpreterSystemQuery.cpp | 5 +++++
 src/Processors/Chunk.cpp                    | 2 +-
 src/Storages/IStorage.h                     | 1 +
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index da46aad0329..39b0ee0b814 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -513,6 +513,7 @@ bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeep
         /// get the same exception again. So we return false only for several special exception codes,
         /// and consider query as executed with status "failed" and return true in other cases.
         bool no_sense_to_retry = e.code() != ErrorCodes::KEEPER_EXCEPTION &&
+                                 e.code() != ErrorCodes::UNFINISHED &&
                                  e.code() != ErrorCodes::NOT_A_LEADER &&
                                  e.code() != ErrorCodes::TABLE_IS_READ_ONLY &&
                                  e.code() != ErrorCodes::CANNOT_ASSIGN_ALTER &&
@@ -793,11 +794,15 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
         // Has to get with zk fields to get active replicas field
         replicated_storage->getStatus(status, true);
 
-        // Should return as soon as possible if the table is dropped.
+        // Should return as soon as possible if the table is dropped or detached, so we will release StoragePtr
         bool replica_dropped = storage->is_dropped;
         bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(storage->getStorageID(), context);
         if (replica_dropped || all_replicas_likely_detached)
         {
+            /// We have to exit (and release StoragePtr) if the replica is being restarted,
+            /// but we can retry in this case, so don't write execution status
+            if (storage->is_being_restarted)
+                throw Exception(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is dropped or detached permanently");
             LOG_WARNING(log, ", task {} will not be executed.", task.entry_name);
             task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, table is dropped or detached permanently");
             return false;
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 07a1ae7d170..8fac8deeca5 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -728,6 +728,11 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica,
     if (!table || !dynamic_cast<const StorageReplicatedMergeTree *>(table.get()))
         return nullptr;
 
+    SCOPE_EXIT({
+        if (table)
+            table->is_being_restarted = false;
+    });
+    table->is_being_restarted = true;
     table->flushAndShutdown();
     {
         /// If table was already dropped by anyone, an exception will be thrown
diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp
index 3839a8963b2..c91df285539 100644
--- a/src/Processors/Chunk.cpp
+++ b/src/Processors/Chunk.cpp
@@ -73,7 +73,7 @@ void Chunk::checkNumRowsIsConsistent()
         auto & column = columns[i];
         if (column->size() != num_rows)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of rows in Chunk column {}: expected {}, got {}",
-                            column->getName()+ " position " + toString(i), toString(num_rows), toString(column->size()));
+                            column->getName() + " position " + toString(i), toString(num_rows), toString(column->size()));
     }
 }
 
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 803ab5e92ba..b237745e1d6 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -593,6 +593,7 @@ public:
 
     std::atomic<bool> is_dropped{false};
     std::atomic<bool> is_detached{false};
+    std::atomic<bool> is_being_restarted{false};
 
     /// Does table support index for IN sections
     virtual bool supportsIndexForIn() const { return false; }

From 480e58b427bf1a43be7e0fd9c97da7fe9175d8e3 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 3 Nov 2023 01:47:50 +0100
Subject: [PATCH 15/80] fix unexpected parts after drop range

---
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  1 +
 src/Storages/MergeTree/MergeTreeData.cpp      | 61 +++++++++++++------
 src/Storages/MergeTree/MergeTreeData.h        |  4 +-
 src/Storages/StorageReplicatedMergeTree.cpp   | 21 +++++++
 src/Storages/System/StorageSystemParts.cpp    |  2 +
 ...86_truncate_and_unexpected_parts.reference |  4 ++
 .../02486_truncate_and_unexpected_parts.sql   | 27 ++++++++
 7 files changed, 101 insertions(+), 19 deletions(-)
 create mode 100644 tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
 create mode 100644 tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 30c9b19fcbc..58b955e4392 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -54,6 +54,7 @@ enum class DataPartRemovalState
     NON_UNIQUE_OWNERSHIP,
     NOT_REACHED_REMOVAL_TIME,
     HAS_SKIPPED_MUTATION_PARENT,
+    EMPTY_PART_COVERS_OTHER_PARTS,
     REMOVED,
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index aa0b6b2ff37..2ce6780f16f 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2211,6 +2211,15 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
                 continue;
             }
 
+            /// First remove all covered parts, then remove covering empty part
+            /// Avoids resurrection of old parts for MergeTree and issues with unexpected parts for Replicated
+            if (part->rows_count == 0 && !getCoveredOutdatedParts(part, parts_lock).empty())
+            {
+                part->removal_state.store(DataPartRemovalState::EMPTY_PART_COVERS_OTHER_PARTS, std::memory_order_relaxed);
+                skipped_parts.push_back(part->info);
+                continue;
+            }
+
             auto part_remove_time = part->remove_time.load(std::memory_order_relaxed);
             bool reached_removal_time = part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds();
             if ((reached_removal_time && !has_skipped_mutation_parent(part))
@@ -2627,18 +2636,6 @@ size_t MergeTreeData::clearEmptyParts()
             if (!part->version.getCreationTID().isPrehistoric() && !part->version.isVisible(TransactionLog::instance().getLatestSnapshot()))
                 continue;
 
-            /// Don't drop empty parts that cover other parts
-            /// Otherwise covered parts resurrect
-            {
-                auto lock = lockParts();
-                if (part->getState() != DataPartState::Active)
-                    continue;
-
-                DataPartsVector covered_parts = getCoveredOutdatedParts(part, lock);
-                if (!covered_parts.empty())
-                    continue;
-            }
-
             parts_names_to_drop.emplace_back(part->name);
         }
     }
@@ -3774,7 +3771,7 @@ void MergeTreeData::removePartsFromWorkingSet(
 
 void MergeTreeData::removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
 {
-    removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(txn, drop_range, lock);
+    removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(txn, drop_range, lock, /*create_empty_part*/ false);
 }
 
 DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange(
@@ -3849,7 +3846,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange(
 }
 
 MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(
-        MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
+        MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock, bool create_empty_part)
 {
 #ifndef NDEBUG
     {
@@ -3870,6 +3867,35 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
     /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
     removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock);
 
+    bool is_new_syntax = format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING;
+    if (create_empty_part && !parts_to_remove.empty() && is_new_syntax)
+    {
+        /// We are going to remove a lot of parts from zookeeper just after returning from this function.
+        /// And we will remove parts from disk later (because some queries may use them).
+        /// But if the server restarts in-between, then it will notice a lot of unexpected parts,
+        /// so it may refuse to start. Let's create an empty part that covers them.
+        /// We don't need to commit it to zk, and don't even need to activate it.
+
+        MergeTreePartInfo empty_info = drop_range;
+        empty_info.level = empty_info.mutation = 0;
+        for (const auto & part : parts_to_remove)
+        {
+            empty_info.level = std::max(empty_info.level, part->info.level);
+            empty_info.mutation = std::max(empty_info.mutation, part->info.mutation);
+        }
+        empty_info.level += 1;
+
+        const auto & partition = parts_to_remove.front()->partition;
+        String empty_part_name = empty_info.getPartNameAndCheckFormat(format_version);
+        auto [new_data_part, tmp_dir_holder] = createEmptyPart(empty_info, partition, empty_part_name, NO_TRANSACTION_PTR);
+
+        MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW);
+        renameTempPartAndAdd(new_data_part, transaction, lock);     /// All covered parts must be already removed
+
+        /// It will add the empty part to the set of Outdated parts without making it Active (exactly what we need)
+        transaction.rollback(&lock);
+    }
+
     /// Since we can return parts in Deleting state, we have to use a wrapper that restricts access to such parts.
     PartsToRemoveFromZooKeeper parts_to_remove_from_zookeeper;
     for (auto & part : parts_to_remove)
@@ -6225,7 +6251,7 @@ void MergeTreeData::Transaction::addPart(MutableDataPartPtr & part)
     precommitted_parts.insert(part);
 }
 
-void MergeTreeData::Transaction::rollback()
+void MergeTreeData::Transaction::rollback(DataPartsLock * lock)
 {
     if (!isEmpty())
     {
@@ -6239,7 +6265,8 @@ void MergeTreeData::Transaction::rollback()
         for (const auto & part : precommitted_parts)
             part->version.creation_csn.store(Tx::RolledBackCSN);
 
-        auto lock = data.lockParts();
+        /// It would be much better with TSA...
+        auto our_lock = (lock) ? DataPartsLock() : data.lockParts();
 
         if (data.data_parts_indexes.empty())
         {
@@ -6258,7 +6285,7 @@ void MergeTreeData::Transaction::rollback()
         {
             data.removePartsFromWorkingSet(txn,
                 DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()),
-                /* clear_without_timeout = */ true, &lock);
+                /* clear_without_timeout = */ true, &our_lock);
         }
     }
 
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index aab04260b0e..07de33aaf58 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -258,7 +258,7 @@ public:
 
         void addPart(MutableDataPartPtr & part);
 
-        void rollback();
+        void rollback(DataPartsLock * lock = nullptr);
 
         /// Immediately remove parts from table's data_parts set and change part
         /// state to temporary. Useful for new parts which not present in table.
@@ -649,7 +649,7 @@ public:
     /// It includes parts that have been just removed by these method
     /// and Outdated parts covered by drop_range that were removed earlier for any reason.
     PartsToRemoveFromZooKeeper removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(
-        MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock);
+        MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock, bool create_empty_part = true);
 
     /// Restores Outdated part and adds it to working set
     void restoreAndActivatePart(const DataPartPtr & part, DataPartsLock * acquired_lock = nullptr);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 03ebe38e6cf..8d426e1b8b3 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1364,6 +1364,18 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
 
     paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch);
 
+    ActiveDataPartSet empty_unexpected_parts_set(format_version);
+    for (const auto & part : parts)
+    {
+        if (part->rows_count || part->getState() != MergeTreeDataPartState::Active || expected_parts.contains(part->name))
+            continue;
+
+        empty_unexpected_parts_set.add(part->name);
+    }
+    if (auto empty_count = empty_unexpected_parts_set.size())
+        LOG_INFO(log, "Found {} empty unexpected parts (probably some dropped parts were not cleaned up before restart): {}",
+                 empty_count, fmt::join(empty_unexpected_parts_set.getParts(), ", "));
+
     /** To check the adequacy, for the parts that are in the FS, but not in ZK, we will only consider not the most recent parts.
       * Because unexpected new parts usually arise only because they did not have time to enroll in ZK with a rough restart of the server.
       * It also occurs from deduplicated parts that did not have time to retire.
@@ -1390,6 +1402,15 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
             continue;
         }
 
+        String covering_empty_part = empty_unexpected_parts_set.getContainingPart(part->name);
+        if (!covering_empty_part.empty())
+        {
+            LOG_WARNING(log, "Unexpected part {} is covered by empty paty {}, assuming it has been dropped just before restart",
+                        part->name, covering_empty_part);
+            covered_unexpected_parts.push_back(part->name);
+            continue;
+        }
+
         auto covered_parts = local_expected_parts_set.getPartInfosCoveredBy(part->info);
 
         if (MergeTreePartInfo::areAllBlockNumbersCovered(part->info, covered_parts))
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index ac38c9c97b1..4bf1053a7b6 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -34,6 +34,8 @@ std::string_view getRemovalStateDescription(DB::DataPartRemovalState state)
         return "Part hasn't reached removal time yet";
     case DB::DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT:
         return "Waiting mutation parent to be removed";
+    case DB::DataPartRemovalState::EMPTY_PART_COVERS_OTHER_PARTS:
+        return "Waiting for covered parts to be removed first";
     case DB::DataPartRemovalState::REMOVED:
         return "Part was selected to be removed";
     }
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
new file mode 100644
index 00000000000..1f991703c7b
--- /dev/null
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
@@ -0,0 +1,4 @@
+1	rmt
+1	rmt1
+2	rmt
+2	rmt1
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
new file mode 100644
index 00000000000..fbd90d8ab0f
--- /dev/null
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
@@ -0,0 +1,27 @@
+
+create table rmt (n int) engine=ReplicatedMergeTree('/test/02468/{database}', '1') order by tuple() partition by n % 2 settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
+create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02468/{database}', '2') order by tuple() partition by n % 2 settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
+
+system stop cleanup rmt;
+system stop merges rmt1;
+
+insert into rmt select * from numbers(10) settings max_block_size=1;
+
+alter table rmt drop partition id '0';
+truncate table rmt1;
+
+system sync replica rmt;
+system sync replica rmt1;
+
+detach table rmt sync;
+detach table rmt1 sync;
+
+attach table rmt;
+attach table rmt1;
+
+insert into rmt values (1);
+insert into rmt1 values (2);
+system sync replica rmt;
+system sync replica rmt1;
+
+select *, _table from merge(currentDatabase(), '') order by (*,), _table;

From 93be383117485dfb735214cf52a6d954e5eefc92 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 3 Nov 2023 12:41:00 +0800
Subject: [PATCH 16/80] fix issue
 https://github.com/ClickHouse/ClickHouse/issues/56285

---
 src/IO/ReadHelpers.cpp |  3 +++
 src/IO/ReadHelpers.h   | 23 ++++++++++++++++++-----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 9b9374ff05a..99990893b93 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1154,6 +1154,7 @@ template bool readDateTextFallback<bool>(LocalDate &, ReadBuffer &);
 template <typename ReturnType, bool dt64_mode>
 ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
 {
+    std::cout << "Stack trace:" << StackTrace().toString() << std::endl;
     static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
 
     /// YYYY-MM-DD
@@ -1262,9 +1263,11 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
                 datetime = datetime * 10 + *digit_pos - '0';
         }
         datetime *= negative_multiplier;
+        std::cout << "datetime:" << datetime << ", too_short:" << too_short << std::endl;
 
         if (too_short && negative_multiplier != -1)
         {
+            std::cout << "parse date time failed:" << std::endl;
             if constexpr (throw_exception)
                 throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime");
             else
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 40f812050db..a1c34f5a124 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -984,20 +984,33 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
 template <typename ReturnType>
 inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut)
 {
+    static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
+
     time_t whole = 0;
     bool is_negative_timestamp = (!buf.eof() && *buf.position() == '-');
     bool is_empty = buf.eof();
 
     if (!is_empty)
     {
-        try
+        if constexpr (throw_exception)
         {
-            readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut);
+            try
+            {
+                readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut);
+            }
+            catch (const DB::ParsingException &)
+            {
+                if (buf.eof() || *buf.position() != '.')
+                    throw;
+            }
         }
-        catch (const DB::ParsingException & exception)
+        else
         {
-            if (buf.eof() || *buf.position() != '.')
-                throw exception;
+            auto ok = readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut);
+            if (!ok && (buf.eof() || *buf.position() != '.'))
+            {
+                return ReturnType(false);
+            }
         }
     }
 

From 3c70b9f65d38e221db5a72f345741f16258680d8 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 3 Nov 2023 15:32:37 +0800
Subject: [PATCH 17/80] add uts

---
 src/IO/ReadHelpers.cpp                                     | 3 ---
 src/IO/ReadHelpers.h                                       | 2 --
 .../0_stateless/02889_datetime64_from_string.reference     | 2 ++
 tests/queries/0_stateless/02889_datetime64_from_string.sql | 7 ++++++-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 99990893b93..9b9374ff05a 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1154,7 +1154,6 @@ template bool readDateTextFallback<bool>(LocalDate &, ReadBuffer &);
 template <typename ReturnType, bool dt64_mode>
 ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
 {
-    std::cout << "Stack trace:" << StackTrace().toString() << std::endl;
     static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
 
     /// YYYY-MM-DD
@@ -1263,11 +1262,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
                 datetime = datetime * 10 + *digit_pos - '0';
         }
         datetime *= negative_multiplier;
-        std::cout << "datetime:" << datetime << ", too_short:" << too_short << std::endl;
 
         if (too_short && negative_multiplier != -1)
         {
-            std::cout << "parse date time failed:" << std::endl;
             if constexpr (throw_exception)
                 throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime");
             else
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index a1c34f5a124..c5a456d70f6 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -1008,9 +1008,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
         {
             auto ok = readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut);
             if (!ok && (buf.eof() || *buf.position() != '.'))
-            {
                 return ReturnType(false);
-            }
         }
     }
 
diff --git a/tests/queries/0_stateless/02889_datetime64_from_string.reference b/tests/queries/0_stateless/02889_datetime64_from_string.reference
index e6e2208ed4c..825ed2b7ff4 100644
--- a/tests/queries/0_stateless/02889_datetime64_from_string.reference
+++ b/tests/queries/0_stateless/02889_datetime64_from_string.reference
@@ -1,3 +1,5 @@
 1969-12-31 23:57:57.000
 1970-01-01 00:00:23.900
 1969-12-31 23:59:36.100
+\N
+\N
diff --git a/tests/queries/0_stateless/02889_datetime64_from_string.sql b/tests/queries/0_stateless/02889_datetime64_from_string.sql
index 50c29de19bd..99ace8a6ea4 100644
--- a/tests/queries/0_stateless/02889_datetime64_from_string.sql
+++ b/tests/queries/0_stateless/02889_datetime64_from_string.sql
@@ -2,4 +2,9 @@ SELECT toDateTime64('-123', 3, 'UTC');    -- Allowed: no year starts with '-'
 SELECT toDateTime64('23.9', 3, 'UTC');    -- Allowed: no year has a dot in notation
 SELECT toDateTime64('-23.9', 3, 'UTC');   -- Allowed
 
-SELECT toDateTime64('1234', 3, 'UTC');    -- { serverError CANNOT_PARSE_DATETIME }
+SELECT toDateTime64OrNull('0', 3, 'UTC');
+SELECT cast('0' as Nullable(DateTime64(3, 'UTC')));
+
+SELECT toDateTime64('1234', 3, 'UTC');      -- { serverError CANNOT_PARSE_DATETIME }
+SELECT toDateTime64('0', 3, 'UTC');         -- { serverError CANNOT_PARSE_DATETIME }
+SELECT cast('0' as DateTime64(3, 'UTC'));   -- { serverError CANNOT_PARSE_DATETIME }

From b50c4c9f3beb474d16d8fd68d267d8bf7bb944f2 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 3 Nov 2023 11:24:10 +0100
Subject: [PATCH 18/80] fix

---
 src/Storages/MergeTree/MergeTreeData.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 2ce6780f16f..a4500d36df1 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3442,8 +3442,6 @@ MergeTreeData::PartHierarchy MergeTreeData::getPartHierarchy(
         if ((*end)->info == part_info)
         {
             result.duplicate_part = *end;
-            result.covering_parts.clear();
-            return result;
         }
 
         if (!part_info.contains((*end)->info))
@@ -3472,10 +3470,11 @@ MergeTreeData::DataPartsVector MergeTreeData::getCoveredOutdatedParts(
     const DataPartPtr & part,
     DataPartsLock & data_parts_lock) const
 {
-    part->assertState({DataPartState::Active, DataPartState::PreActive});
+    part->assertState({DataPartState::Active, DataPartState::PreActive, DataPartState::Outdated});
+    bool is_outdated_part = part->getState() == DataPartState::Outdated;
     PartHierarchy hierarchy = getPartHierarchy(part->info, DataPartState::Outdated, data_parts_lock);
 
-    if (hierarchy.duplicate_part)
+    if (hierarchy.duplicate_part && !is_outdated_part)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected duplicate part {}. It is a bug.", hierarchy.duplicate_part->getNameWithState());
 
     return hierarchy.covered_parts;
@@ -3650,6 +3649,10 @@ bool MergeTreeData::renameTempPartAndReplaceImpl(
                         part->name, hierarchy.intersected_parts.back()->getNameWithState(), hierarchy.intersected_parts.size());
     }
 
+    if (hierarchy.duplicate_part)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected duplicate part {}. It is a bug.", hierarchy.duplicate_part->getNameWithState());
+
+
     if (part->hasLightweightDelete())
         has_lightweight_delete_parts.store(true);
 

From 6285de32af952826e23a0f48ee5a7496a6ef1cab Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Fri, 3 Nov 2023 14:53:41 +0100
Subject: [PATCH 19/80] Update src/Core/Settings.h

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 8260c41e626..2b4233537c6 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -289,7 +289,7 @@ class IColumn;
     M(UInt64, http_response_buffer_size, 0, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \
     \
     M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0)    \
-    M(Bool, storage_metadata_write_full_object_key, false, "Enable write metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
+    M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
     \
     M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
     \

From 6e82a309e2eb4842a97571cd0a15953278906dba Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Fri, 3 Nov 2023 14:53:47 +0100
Subject: [PATCH 20/80] Update src/Common/ObjectStorageKey.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Common/ObjectStorageKey.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/ObjectStorageKey.cpp b/src/Common/ObjectStorageKey.cpp
index f88e8f9fa5f..27c77f75038 100644
--- a/src/Common/ObjectStorageKey.cpp
+++ b/src/Common/ObjectStorageKey.cpp
@@ -62,7 +62,7 @@ ObjectStorageKey ObjectStorageKey::createAsAbsolute(String key_)
 {
     ObjectStorageKey object_key;
     object_key.key = std::move(key_);
-    object_key.is_relative = true;
+    object_key.is_relative = false;
     return object_key;
 }
 }

From bab867f383accc4d7843d007e4504670dcd87958 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Fri, 3 Nov 2023 14:54:10 +0100
Subject: [PATCH 21/80] Update
 src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
index 5ce6a99c4fd..082c518d7ba 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
@@ -97,7 +97,7 @@ void DiskObjectStorageMetadata::deserializeFromString(const String & data)
 void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
 {
     /// There are the changes for backward compatibility
-    /// No new file should be write as VERSION_FULL_OBJECT_KEY until storage_metadata_write_full_object_key feature is enabled
+    /// No new file should be written as VERSION_FULL_OBJECT_KEY until storage_metadata_write_full_object_key feature is enabled
     /// However, in case of rollback, once file had been written as VERSION_FULL_OBJECT_KEY
     /// it has to be always rewritten as VERSION_FULL_OBJECT_KEY
 

From 890cc8bbca653c2549910d2b2c0d3a00089eef57 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Fri, 3 Nov 2023 14:54:40 +0100
Subject: [PATCH 22/80] Update
 src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
index 082c518d7ba..9e501cc9342 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
@@ -96,7 +96,7 @@ void DiskObjectStorageMetadata::deserializeFromString(const String & data)
 
 void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
 {
-    /// There are the changes for backward compatibility
+    /// These are the changes for backward compatibility
     /// No new file should be written as VERSION_FULL_OBJECT_KEY until storage_metadata_write_full_object_key feature is enabled
     /// However, in case of rollback, once file had been written as VERSION_FULL_OBJECT_KEY
     /// it has to be always rewritten as VERSION_FULL_OBJECT_KEY

From a108f9f764fa55ace1348504ae8e743dcf02cf78 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Fri, 3 Nov 2023 14:54:48 +0100
Subject: [PATCH 23/80] Update
 src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index 8bcb2cf06bb..fa3ed399072 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -670,7 +670,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
     if (metadata_helper)
     {
         if (!object_key.hasPrefix())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "metadata helper is not supported with abs paths");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "metadata helper is not supported with absolute paths");
 
         auto revision = metadata_helper->revision_counter + 1;
         metadata_helper->revision_counter++;

From 3e8ad144232bf307e332e5c5258753b84d681d95 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 3 Nov 2023 16:07:05 +0100
Subject: [PATCH 24/80] Implement digest helpers for different objects

---
 tests/ci/ccache_utils.py  |  4 +--
 tests/ci/digest_helper.py | 63 +++++++++++++++++++++++++++++++++++++++
 tests/ci/s3_helper.py     | 10 -------
 3 files changed, 65 insertions(+), 12 deletions(-)
 create mode 100644 tests/ci/digest_helper.py

diff --git a/tests/ci/ccache_utils.py b/tests/ci/ccache_utils.py
index 75a026d2524..6ccaa8c80e0 100644
--- a/tests/ci/ccache_utils.py
+++ b/tests/ci/ccache_utils.py
@@ -3,13 +3,13 @@
 import logging
 import os
 import shutil
-from hashlib import md5
 from pathlib import Path
 
 import requests  # type: ignore
 
 from build_download_helper import download_build_with_progress, DownloadException
 from compress_files import decompress_fast, compress_fast
+from digest_helper import digest_path
 from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET
 from git_helper import git_runner
 from s3_helper import S3Helper
@@ -108,7 +108,7 @@ class CargoCache:
         s3_helper: S3Helper,
     ):
         self._cargo_lock_file = Path(git_runner.cwd) / "rust" / "Cargo.lock"
-        self.lock_hash = md5(self._cargo_lock_file.read_bytes()).hexdigest()
+        self.lock_hash = digest_path(self._cargo_lock_file).hexdigest()
         self.directory = directory
         self.archive_name = f"Cargo_cache_{self.lock_hash}.tar.zst"
         self.temp_path = temp_path
diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py
new file mode 100644
index 00000000000..a97c541cc65
--- /dev/null
+++ b/tests/ci/digest_helper.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+from hashlib import md5
+from logging import getLogger
+from pathlib import Path
+from typing import TYPE_CHECKING
+from sys import modules
+
+if TYPE_CHECKING:
+    from hashlib import (  # pylint:disable=no-name-in-module,ungrouped-imports
+        _Hash as HASH,
+    )
+else:
+    HASH = "_Hash"
+
+logger = getLogger(__name__)
+
+
+def _digest_file(file: Path) -> HASH:
+    assert file.is_file()
+    md5_hash = md5()
+    with open(file, "rb") as fd:
+        for chunk in iter(lambda: fd.read(4096), b""):
+            md5_hash.update(chunk)
+    return md5_hash
+
+
+def _digest_directory(directory: Path) -> HASH:
+    assert directory.is_dir()
+    md5_hash = md5()
+    for p in sorted(directory.rglob("*")):
+        if p.is_symlink() and p.is_dir():
+            # The symlink directory is not listed recursively, so we process it manually
+            md5_hash.update(_digest_directory(p).digest())
+        if p.is_file():
+            md5_hash.update(_digest_file(p).digest())
+    return md5_hash
+
+
+def digest_path(path: Path) -> HASH:
+    """Calculates md5 hash of the path, either it's directory or file"""
+    if path.is_dir():
+        return _digest_directory(path)
+    if path.is_file():
+        return _digest_file(path)
+    return md5()
+
+
+def digest_script(path_str: str) -> HASH:
+    """Accepts value of the __file__ executed script and calculates the md5 hash for it"""
+    path = Path(path_str)
+    parent = path.parent
+    md5_hash = md5()
+    try:
+        for script in modules.values():
+            script_path = getattr(script, "__file__", "")
+            if parent.absolute().as_posix() in script_path:
+                logger.debug("Updating the hash with %s", script_path)
+                md5_hash.update(_digest_file(Path(script_path)).digest())
+    except RuntimeError:
+        logger.warning("The modules size has changed, retry calculating digest")
+        return digest_script(path_str)
+    return md5_hash
diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py
index bb047b4f4ef..f94f7f60bb6 100644
--- a/tests/ci/s3_helper.py
+++ b/tests/ci/s3_helper.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-import hashlib
 import logging
 import re
 import shutil
@@ -22,15 +21,6 @@ from env_helper import (
 from compress_files import compress_file_fast
 
 
-def _md5(fname):
-    hash_md5 = hashlib.md5()
-    with open(fname, "rb") as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            hash_md5.update(chunk)
-    logging.debug("MD5 for %s is %s", fname, hash_md5.hexdigest())
-    return hash_md5.hexdigest()
-
-
 def _flatten_list(lst):
     result = []
     for elem in lst:

From f7c5602da45d14c17063a176be5db9da6be8bd89 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 3 Nov 2023 16:30:14 +0100
Subject: [PATCH 25/80] Add digest_paths

---
 tests/ci/digest_helper.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py
index a97c541cc65..21febeafc57 100644
--- a/tests/ci/digest_helper.py
+++ b/tests/ci/digest_helper.py
@@ -3,7 +3,7 @@
 from hashlib import md5
 from logging import getLogger
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Iterable
 from sys import modules
 
 if TYPE_CHECKING:
@@ -46,6 +46,15 @@ def digest_path(path: Path) -> HASH:
     return md5()
 
 
+def digest_paths(paths: Iterable[Path]) -> HASH:
+    """Calculates aggregated md5 hash of passed paths. The order matters"""
+    md5_hash = md5()
+    for path in paths:
+        if path.exists():
+            md5_hash.update(digest_path(path).digest())
+    return md5_hash
+
+
 def digest_script(path_str: str) -> HASH:
     """Accepts value of the __file__ executed script and calculates the md5 hash for it"""
     path = Path(path_str)

From e352e7bfba92d2dd209d5aa7fde516316abadb0d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 3 Nov 2023 17:41:53 +0100
Subject: [PATCH 26/80] Change digest API to update the single hash object

---
 tests/ci/digest_helper.py | 43 +++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py
index 21febeafc57..69a62fa62b5 100644
--- a/tests/ci/digest_helper.py
+++ b/tests/ci/digest_helper.py
@@ -3,7 +3,7 @@
 from hashlib import md5
 from logging import getLogger
 from pathlib import Path
-from typing import TYPE_CHECKING, Iterable
+from typing import TYPE_CHECKING, Iterable, Optional
 from sys import modules
 
 if TYPE_CHECKING:
@@ -16,43 +16,42 @@ else:
 logger = getLogger(__name__)
 
 
-def _digest_file(file: Path) -> HASH:
+def _digest_file(file: Path, hash_object: HASH) -> None:
     assert file.is_file()
-    md5_hash = md5()
     with open(file, "rb") as fd:
         for chunk in iter(lambda: fd.read(4096), b""):
-            md5_hash.update(chunk)
-    return md5_hash
+            hash_object.update(chunk)
 
 
-def _digest_directory(directory: Path) -> HASH:
+def _digest_directory(directory: Path, hash_object: HASH) -> None:
     assert directory.is_dir()
-    md5_hash = md5()
     for p in sorted(directory.rglob("*")):
         if p.is_symlink() and p.is_dir():
             # The symlink directory is not listed recursively, so we process it manually
-            md5_hash.update(_digest_directory(p).digest())
+            (_digest_directory(p, hash_object))
         if p.is_file():
-            md5_hash.update(_digest_file(p).digest())
-    return md5_hash
+            (_digest_file(p, hash_object))
 
 
-def digest_path(path: Path) -> HASH:
-    """Calculates md5 hash of the path, either it's directory or file"""
+def digest_path(path: Path, hash_object: Optional[HASH] = None) -> HASH:
+    """Calculates md5 (or updates existing hash_object) hash of the path, either it's
+    directory or file"""
+    hash_object = hash_object or md5()
     if path.is_dir():
-        return _digest_directory(path)
-    if path.is_file():
-        return _digest_file(path)
-    return md5()
+        _digest_directory(path, hash_object)
+    elif path.is_file():
+        _digest_file(path, hash_object)
+    return hash_object
 
 
-def digest_paths(paths: Iterable[Path]) -> HASH:
-    """Calculates aggregated md5 hash of passed paths. The order matters"""
-    md5_hash = md5()
+def digest_paths(paths: Iterable[Path], hash_object: Optional[HASH] = None) -> HASH:
+    """Calculates aggregated md5 (or updates existing hash_object) hash of passed paths.
+    The order matters"""
+    hash_object = hash_object or md5()
     for path in paths:
         if path.exists():
-            md5_hash.update(digest_path(path).digest())
-    return md5_hash
+            digest_path(path, hash_object)
+    return hash_object
 
 
 def digest_script(path_str: str) -> HASH:
@@ -65,7 +64,7 @@ def digest_script(path_str: str) -> HASH:
             script_path = getattr(script, "__file__", "")
             if parent.absolute().as_posix() in script_path:
                 logger.debug("Updating the hash with %s", script_path)
-                md5_hash.update(_digest_file(Path(script_path)).digest())
+                _digest_file(Path(script_path), md5_hash)
     except RuntimeError:
         logger.warning("The modules size has changed, retry calculating digest")
         return digest_script(path_str)

From 40c2329fc315ff517637d18f31eabe9c1e27bdf7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 3 Nov 2023 18:37:45 +0000
Subject: [PATCH 27/80] Fix more tests with analyzer.

---
 tests/analyzer_tech_debt.txt                               | 2 --
 tests/queries/0_stateless/02341_global_join_cte.reference  | 7 ++++++-
 tests/queries/0_stateless/02341_global_join_cte.sql        | 3 ++-
 .../02713_array_low_cardinality_string.reference           | 6 +++---
 .../0_stateless/02713_array_low_cardinality_string.sql     | 2 +-
 5 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index ff93d2f4e30..7d662f90df8 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -34,7 +34,6 @@
 02139_MV_with_scalar_subquery
 02174_cte_scalar_cache_mv
 02302_s3_file_pruning
-02341_global_join_cte
 02345_implicit_transaction
 02352_grouby_shadows_arg
 02354_annoy
@@ -50,7 +49,6 @@
 02521_aggregation_by_partitions
 02554_fix_grouping_sets_predicate_push_down
 02575_merge_prewhere_different_default_kind
-02713_array_low_cardinality_string
 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
 01009_global_array_join_names
 00917_multiple_joins_denny_crane
diff --git a/tests/queries/0_stateless/02341_global_join_cte.reference b/tests/queries/0_stateless/02341_global_join_cte.reference
index 8b3cd68232a..f2cfe994ffa 100644
--- a/tests/queries/0_stateless/02341_global_join_cte.reference
+++ b/tests/queries/0_stateless/02341_global_join_cte.reference
@@ -1,5 +1,10 @@
 -- { echo }
-with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2; -- { serverError ALIAS_REQUIRED }
+with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2 settings allow_experimental_analyzer=0; -- { serverError ALIAS_REQUIRED }
+with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2 settings allow_experimental_analyzer=1; -- It works with analyzer; rhs is an alias itself.
+0
+0
+0
+0
 with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2 settings joined_subquery_requires_alias=0;
 0
 0
diff --git a/tests/queries/0_stateless/02341_global_join_cte.sql b/tests/queries/0_stateless/02341_global_join_cte.sql
index b77e5b0b688..b9b906afd70 100644
--- a/tests/queries/0_stateless/02341_global_join_cte.sql
+++ b/tests/queries/0_stateless/02341_global_join_cte.sql
@@ -1,4 +1,5 @@
 -- { echo }
-with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2; -- { serverError ALIAS_REQUIRED }
+with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2 settings allow_experimental_analyzer=0; -- { serverError ALIAS_REQUIRED }
+with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2 settings allow_experimental_analyzer=1; -- It works with analyzer; rhs is an alias itself.
 with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2 settings joined_subquery_requires_alias=0;
 with rhs_ as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs_ rhs using (d1) order by rhs.d2 settings joined_subquery_requires_alias=0;
diff --git a/tests/queries/0_stateless/02713_array_low_cardinality_string.reference b/tests/queries/0_stateless/02713_array_low_cardinality_string.reference
index aea0fd62732..f444d1d7e58 100644
--- a/tests/queries/0_stateless/02713_array_low_cardinality_string.reference
+++ b/tests/queries/0_stateless/02713_array_low_cardinality_string.reference
@@ -1,9 +1,9 @@
 ---
 tab	idx	bloom_filter
 ---
-Expression ((Projection + Before ORDER BY))
-  Filter (WHERE)
-    ReadFromMergeTree (default.tab)
+Expression
+  Filter
+    ReadFromMergeTree
     Indexes:
       Skip
         Name: idx
diff --git a/tests/queries/0_stateless/02713_array_low_cardinality_string.sql b/tests/queries/0_stateless/02713_array_low_cardinality_string.sql
index 4ecd3bf17c1..c55d57f04e7 100644
--- a/tests/queries/0_stateless/02713_array_low_cardinality_string.sql
+++ b/tests/queries/0_stateless/02713_array_low_cardinality_string.sql
@@ -18,6 +18,6 @@ WHERE database = currentDatabase() AND table = 'tab';
 
 SELECT '---';
 
-EXPLAIN indexes = 1 SELECT * FROM tab WHERE has(foo, 'b');
+EXPLAIN indexes = 1, description=0 SELECT * FROM tab WHERE has(foo, 'b');
 
 DROP TABLE tab;

From 289fcccbfc2cb456810361229881ddfcdad1efdd Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 3 Nov 2023 20:19:24 +0100
Subject: [PATCH 28/80] change some exception codes

---
 src/Compression/CompressionCodecMultiple.cpp | 6 +++---
 src/Compression/ICompressionCodec.cpp        | 7 +++----
 src/IO/HTTPChunkedReadBuffer.cpp             | 4 ++--
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp
index dba67749e4d..5f2303a282a 100644
--- a/src/Compression/CompressionCodecMultiple.cpp
+++ b/src/Compression/CompressionCodecMultiple.cpp
@@ -17,7 +17,7 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int CORRUPTED_DATA;
+    extern const int CANNOT_DECOMPRESS;
 }
 
 CompressionCodecMultiple::CompressionCodecMultiple(Codecs codecs_)
@@ -79,7 +79,7 @@ UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 sour
 void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const
 {
     if (source_size < 1 || !source[0])
-        throw Exception(ErrorCodes::CORRUPTED_DATA, "Wrong compression methods list");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Wrong compression methods list");
 
     UInt8 compression_methods_size = source[0];
 
@@ -98,7 +98,7 @@ void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 sour
         UInt32 uncompressed_size = ICompressionCodec::readDecompressedBlockSize(compressed_buf.data());
 
         if (idx == 0 && uncompressed_size != decompressed_size)
-            throw Exception(ErrorCodes::CORRUPTED_DATA, "Wrong final decompressed size in codec Multiple, got {}, expected {}",
+            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Wrong final decompressed size in codec Multiple, got {}, expected {}",
                 uncompressed_size, decompressed_size);
 
         uncompressed_buf.resize(uncompressed_size + additional_size_at_the_end_of_buffer);
diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp
index a8257c4331f..1a4ee1a61a1 100644
--- a/src/Compression/ICompressionCodec.cpp
+++ b/src/Compression/ICompressionCodec.cpp
@@ -16,7 +16,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_DECOMPRESS;
-    extern const int CORRUPTED_DATA;
     extern const int LOGICAL_ERROR;
 }
 
@@ -97,7 +96,7 @@ UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, ch
 
     UInt8 header_size = getHeaderSize();
     if (source_size < header_size)
-        throw Exception(ErrorCodes::CORRUPTED_DATA,
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS,
                         "Can't decompress data: the compressed data size ({}, this should include header size) "
                         "is less than the header size ({})", source_size, static_cast<size_t>(header_size));
 
@@ -116,7 +115,7 @@ UInt32 ICompressionCodec::readCompressedBlockSize(const char * source)
 {
     UInt32 compressed_block_size = unalignedLoadLittleEndian<UInt32>(&source[1]);
     if (compressed_block_size == 0)
-        throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: header is corrupt with compressed block size 0");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Can't decompress data: header is corrupt with compressed block size 0");
     return compressed_block_size;
 }
 
@@ -125,7 +124,7 @@ UInt32 ICompressionCodec::readDecompressedBlockSize(const char * source)
 {
     UInt32 decompressed_block_size = unalignedLoadLittleEndian<UInt32>(&source[5]);
     if (decompressed_block_size == 0)
-        throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: header is corrupt with decompressed block size 0");
+        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Can't decompress data: header is corrupt with decompressed block size 0");
     return decompressed_block_size;
 }
 
diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp
index 29034b35e16..41788fa8ce7 100644
--- a/src/IO/HTTPChunkedReadBuffer.cpp
+++ b/src/IO/HTTPChunkedReadBuffer.cpp
@@ -13,7 +13,7 @@ namespace ErrorCodes
 {
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int UNEXPECTED_END_OF_FILE;
-    extern const int CORRUPTED_DATA;
+    extern const int BAD_REQUEST_PARAMETER;
 }
 
 size_t HTTPChunkedReadBuffer::readChunkHeader()
@@ -22,7 +22,7 @@ size_t HTTPChunkedReadBuffer::readChunkHeader()
         throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of file while reading chunk header of HTTP chunked data");
 
     if (!isHexDigit(*in->position()))
-        throw Exception(ErrorCodes::CORRUPTED_DATA, "Unexpected data instead of HTTP chunk header");
+        throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Unexpected data instead of HTTP chunk header");
 
     size_t res = 0;
     do

From a44e27b0dc0bc18004f4d8d9be8794d7635022d3 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 3 Nov 2023 22:28:10 +0100
Subject: [PATCH 29/80] fix

---
 src/Storages/MergeTree/MergeTreeData.cpp                   | 7 +++++++
 .../queries/0_stateless/01660_system_parts_smoke.reference | 1 -
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index a4500d36df1..627d59caef1 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3461,6 +3461,9 @@ MergeTreeData::PartHierarchy MergeTreeData::getPartHierarchy(
         ++end;
     }
 
+    if (begin != committed_parts_range.end() && (*begin)->info == part_info)
+        ++begin;
+
     result.covered_parts.insert(result.covered_parts.end(), begin, end);
 
     return result;
@@ -3881,8 +3884,11 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
 
         MergeTreePartInfo empty_info = drop_range;
         empty_info.level = empty_info.mutation = 0;
+        if (!empty_info.min_block)
+            empty_info.min_block = MergeTreePartInfo::MAX_BLOCK_NUMBER;
         for (const auto & part : parts_to_remove)
         {
+            empty_info.min_block = std::min(empty_info.min_block, part->info.min_block);
             empty_info.level = std::max(empty_info.level, part->info.level);
             empty_info.mutation = std::max(empty_info.mutation, part->info.mutation);
         }
@@ -3897,6 +3903,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
 
         /// It will add the empty part to the set of Outdated parts without making it Active (exactly what we need)
         transaction.rollback(&lock);
+        new_data_part->remove_time.store(0, std::memory_order_relaxed);
     }
 
     /// Since we can return parts in Deleting state, we have to use a wrapper that restricts access to such parts.
diff --git a/tests/queries/0_stateless/01660_system_parts_smoke.reference b/tests/queries/0_stateless/01660_system_parts_smoke.reference
index b38d699c2b9..3c134f02d0b 100644
--- a/tests/queries/0_stateless/01660_system_parts_smoke.reference
+++ b/tests/queries/0_stateless/01660_system_parts_smoke.reference
@@ -9,6 +9,5 @@ all_2_2_0	1
 1	Active
 2	Outdated
 # truncate
-HAVE PARTS	Active
 HAVE PARTS	Outdated
 # drop

From 8958861f99697a9efe0b59fe1b4479c967a543c7 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 3 Nov 2023 22:43:40 +0100
Subject: [PATCH 30/80] review notes

---
 src/Common/ObjectStorageKey.cpp                    |  2 +-
 src/Common/ObjectStorageKey.h                      |  2 +-
 .../AzureBlobStorage/AzureObjectStorage.cpp        |  2 +-
 .../DiskObjectStorageTransaction.cpp               | 14 +++++++-------
 src/Disks/ObjectStorages/Web/WebObjectStorage.h    |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Common/ObjectStorageKey.cpp b/src/Common/ObjectStorageKey.cpp
index 27c77f75038..ca5617c8aa2 100644
--- a/src/Common/ObjectStorageKey.cpp
+++ b/src/Common/ObjectStorageKey.cpp
@@ -34,7 +34,7 @@ const String & ObjectStorageKey::serialize() const
     return key;
 }
 
-ObjectStorageKey ObjectStorageKey::createAsRelativeAnyway(String key_)
+ObjectStorageKey ObjectStorageKey::createAsRelative(String key_)
 {
     ObjectStorageKey object_key;
     object_key.suffix = std::move(key_);
diff --git a/src/Common/ObjectStorageKey.h b/src/Common/ObjectStorageKey.h
index e10f6a2382e..7e509b741e4 100644
--- a/src/Common/ObjectStorageKey.h
+++ b/src/Common/ObjectStorageKey.h
@@ -16,7 +16,7 @@ namespace DB
         const String & serialize() const;
 
         static ObjectStorageKey createAsRelative(String prefix_, String suffix_);
-        static ObjectStorageKey createAsRelativeAnyway(String key_);
+        static ObjectStorageKey createAsRelative(String key_);
         static ObjectStorageKey createAsAbsolute(String key_);
 
     private:
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 400ad4a1678..fcb82daca95 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -104,7 +104,7 @@ AzureObjectStorage::AzureObjectStorage(
 
 ObjectStorageKey AzureObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
 {
-    return ObjectStorageKey::createAsRelativeAnyway(getRandomASCIIString(32));
+    return ObjectStorageKey::createAsRelative(getRandomASCIIString(32));
 }
 
 bool AzureObjectStorage::exists(const StoredObject & object) const
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index fa3ed399072..25de89a9548 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -689,7 +689,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
 
     if (autocommit)
     {
-        create_metadata_callback = [tx = shared_from_this(), mode, path, object_key](size_t count)
+        create_metadata_callback = [tx = shared_from_this(), mode, path, key_ = std::move(object_key)](size_t count)
         {
             if (mode == WriteMode::Rewrite)
             {
@@ -698,10 +698,10 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
                 if (!tx->object_storage.isWriteOnce() && tx->metadata_storage.exists(path))
                     tx->object_storage.removeObjectsIfExist(tx->metadata_storage.getStorageObjects(path));
 
-                tx->metadata_transaction->createMetadataFile(path, object_key, count);
+                tx->metadata_transaction->createMetadataFile(path, key_, count);
             }
             else
-                tx->metadata_transaction->addBlobToMetadata(path, object_key, count);
+                tx->metadata_transaction->addBlobToMetadata(path, key_, count);
 
             tx->metadata_transaction->commit();
         };
@@ -710,7 +710,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
     {
         auto write_operation = std::make_unique<WriteFileObjectStorageOperation>(object_storage, metadata_storage, object);
 
-        create_metadata_callback = [object_storage_tx = shared_from_this(), write_op = write_operation.get(), mode, path, object_key](size_t count)
+        create_metadata_callback = [object_storage_tx = shared_from_this(), write_op = write_operation.get(), mode, path, key_ = std::move(object_key)](size_t count)
         {
             /// This callback called in WriteBuffer finalize method -- only there we actually know
             /// how many bytes were written. We don't control when this finalize method will be called
@@ -722,7 +722,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
             /// ...
             /// buf1->finalize() // shouldn't do anything with metadata operations, just memoize what to do
             /// tx->commit()
-            write_op->setOnExecute([object_storage_tx, mode, path, object_key, count](MetadataTransactionPtr tx)
+            write_op->setOnExecute([object_storage_tx, mode, path, key_, count](MetadataTransactionPtr tx)
             {
                 if (mode == WriteMode::Rewrite)
                 {
@@ -734,10 +734,10 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
                             object_storage_tx->metadata_storage.getStorageObjects(path));
                     }
 
-                    tx->createMetadataFile(path, object_key, count);
+                    tx->createMetadataFile(path, key_, count);
                 }
                 else
-                    tx->addBlobToMetadata(path, object_key, count);
+                    tx->addBlobToMetadata(path, key_, count);
             });
         };
 
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
index 4bd96825818..cadc369a0ec 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
@@ -91,7 +91,7 @@ public:
 
     ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override
     {
-        return ObjectStorageKey::createAsRelativeAnyway(path);
+        return ObjectStorageKey::createAsRelative(path);
     }
 
     bool isRemote() const override { return true; }

From 0b0b1b21c689a0f4290d3faefed0eee2596541d7 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sat, 4 Nov 2023 14:18:49 +0100
Subject: [PATCH 31/80] different error codes for internal and external data

---
 src/Compression/CompressedReadBuffer.h       |  4 +--
 src/Compression/CompressedReadBufferBase.cpp | 30 +++++++++++++-------
 src/Compression/CompressedReadBufferBase.h   |  5 +++-
 src/Compression/CompressionCodecMultiple.cpp | 12 ++------
 src/Compression/ICompressionCodec.cpp        | 13 ++++-----
 src/Compression/ICompressionCodec.h          | 17 +++++++++--
 6 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/src/Compression/CompressedReadBuffer.h b/src/Compression/CompressedReadBuffer.h
index 0c537d171c4..bbbea2e967e 100644
--- a/src/Compression/CompressedReadBuffer.h
+++ b/src/Compression/CompressedReadBuffer.h
@@ -16,8 +16,8 @@ private:
     bool nextImpl() override;
 
 public:
-    explicit CompressedReadBuffer(ReadBuffer & in_, bool allow_different_codecs_ = false)
-        : CompressedReadBufferBase(&in_, allow_different_codecs_), BufferWithOwnMemory<ReadBuffer>(0)
+    explicit CompressedReadBuffer(ReadBuffer & in_, bool allow_different_codecs_ = false, bool external_data_ = false)
+        : CompressedReadBufferBase(&in_, allow_different_codecs_, external_data_), BufferWithOwnMemory<ReadBuffer>(0)
     {
     }
 
diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp
index dd19955d010..e416fadc829 100644
--- a/src/Compression/CompressedReadBufferBase.cpp
+++ b/src/Compression/CompressedReadBufferBase.cpp
@@ -114,7 +114,8 @@ static void readHeaderAndGetCodecAndSize(
     CompressionCodecPtr & codec,
     size_t & size_decompressed,
     size_t & size_compressed_without_checksum,
-    bool allow_different_codecs)
+    bool allow_different_codecs,
+    bool external_data)
 {
     uint8_t method = ICompressionCodec::readMethod(compressed_buffer);
 
@@ -136,8 +137,11 @@ static void readHeaderAndGetCodecAndSize(
         }
     }
 
-    size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(compressed_buffer);
-    size_decompressed = ICompressionCodec::readDecompressedBlockSize(compressed_buffer);
+    if (external_data)
+        codec->setExternalDataFlag();
+
+    size_compressed_without_checksum = codec->readCompressedBlockSize(compressed_buffer);
+    size_decompressed = codec->readDecompressedBlockSize(compressed_buffer);
 
     /// This is for clang static analyzer.
     assert(size_decompressed > 0);
@@ -170,7 +174,8 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
         codec,
         size_decompressed,
         size_compressed_without_checksum,
-        allow_different_codecs);
+        allow_different_codecs,
+        external_data);
 
     auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
 
@@ -221,7 +226,8 @@ size_t CompressedReadBufferBase::readCompressedDataBlockForAsynchronous(size_t &
         codec,
         size_decompressed,
         size_compressed_without_checksum,
-        allow_different_codecs);
+        allow_different_codecs,
+        external_data);
 
     auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
 
@@ -254,7 +260,8 @@ size_t CompressedReadBufferBase::readCompressedDataBlockForAsynchronous(size_t &
     }
 }
 
-static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_decompressed, CompressionCodecPtr & codec, bool allow_different_codecs)
+static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_decompressed, CompressionCodecPtr & codec,
+                                  bool allow_different_codecs, bool external_data)
 {
     ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks);
     ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed);
@@ -278,17 +285,20 @@ static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_de
                             getHexUIntLowercase(method), getHexUIntLowercase(codec->getMethodByte()));
         }
     }
+
+    if (external_data)
+        codec->setExternalDataFlag();
 }
 
 void CompressedReadBufferBase::decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
 {
-    readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
+    readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs, external_data);
     codec->decompress(compressed_buffer, static_cast<UInt32>(size_compressed_without_checksum), to);
 }
 
 void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum)
 {
-    readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
+    readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs, external_data);
 
     if (codec->isNone())
     {
@@ -320,8 +330,8 @@ void CompressedReadBufferBase::setDecompressMode(ICompressionCodec::CodecMode mo
 }
 
 /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
-CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_)
-    : compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_)
+CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_, bool external_data_)
+    : compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_), external_data(external_data_)
 {
 }
 
diff --git a/src/Compression/CompressedReadBufferBase.h b/src/Compression/CompressedReadBufferBase.h
index baea4d2b855..0a995f012fd 100644
--- a/src/Compression/CompressedReadBufferBase.h
+++ b/src/Compression/CompressedReadBufferBase.h
@@ -30,6 +30,9 @@ protected:
     /// Allow reading data, compressed by different codecs from one file.
     bool allow_different_codecs;
 
+    /// Report decompression errors as CANNOT_DECOMPRESS, not CORRUPTED_DATA
+    bool external_data;
+
     /// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
     ///
     /// If always_copy is true then even if the compressed block is already stored in compressed_in.buffer()
@@ -67,7 +70,7 @@ protected:
 
 public:
     /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
-    explicit CompressedReadBufferBase(ReadBuffer * in = nullptr, bool allow_different_codecs_ = false);
+    explicit CompressedReadBufferBase(ReadBuffer * in = nullptr, bool allow_different_codecs_ = false, bool external_data_ = false);
     virtual ~CompressedReadBufferBase();
 
     /** Disable checksums.
diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp
index 5f2303a282a..b1eb7fb50c3 100644
--- a/src/Compression/CompressionCodecMultiple.cpp
+++ b/src/Compression/CompressionCodecMultiple.cpp
@@ -14,12 +14,6 @@
 namespace DB
 {
 
-
-namespace ErrorCodes
-{
-    extern const int CANNOT_DECOMPRESS;
-}
-
 CompressionCodecMultiple::CompressionCodecMultiple(Codecs codecs_)
     : codecs(codecs_)
 {
@@ -79,7 +73,7 @@ UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 sour
 void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const
 {
     if (source_size < 1 || !source[0])
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Wrong compression methods list");
+        throw Exception(decompression_error_code, "Wrong compression methods list");
 
     UInt8 compression_methods_size = source[0];
 
@@ -95,10 +89,10 @@ void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 sour
         auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
 
         compressed_buf.resize(compressed_buf.size() + additional_size_at_the_end_of_buffer);
-        UInt32 uncompressed_size = ICompressionCodec::readDecompressedBlockSize(compressed_buf.data());
+        UInt32 uncompressed_size = readDecompressedBlockSize(compressed_buf.data());
 
         if (idx == 0 && uncompressed_size != decompressed_size)
-            throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Wrong final decompressed size in codec Multiple, got {}, expected {}",
+            throw Exception(decompression_error_code, "Wrong final decompressed size in codec Multiple, got {}, expected {}",
                 uncompressed_size, decompressed_size);
 
         uncompressed_buf.resize(uncompressed_size + additional_size_at_the_end_of_buffer);
diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp
index 1a4ee1a61a1..b4cd6864030 100644
--- a/src/Compression/ICompressionCodec.cpp
+++ b/src/Compression/ICompressionCodec.cpp
@@ -15,7 +15,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int CANNOT_DECOMPRESS;
     extern const int LOGICAL_ERROR;
 }
 
@@ -96,14 +95,14 @@ UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, ch
 
     UInt8 header_size = getHeaderSize();
     if (source_size < header_size)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS,
+        throw Exception(decompression_error_code,
                         "Can't decompress data: the compressed data size ({}, this should include header size) "
                         "is less than the header size ({})", source_size, static_cast<size_t>(header_size));
 
     uint8_t our_method = getMethodByte();
     uint8_t method = source[0];
     if (method != our_method)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Can't decompress data with codec byte {} using codec with byte {}", method, our_method);
+        throw Exception(decompression_error_code, "Can't decompress data with codec byte {} using codec with byte {}", method, our_method);
 
     UInt32 decompressed_size = readDecompressedBlockSize(source);
     doDecompressData(&source[header_size], source_size - header_size, dest, decompressed_size);
@@ -111,20 +110,20 @@ UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, ch
     return decompressed_size;
 }
 
-UInt32 ICompressionCodec::readCompressedBlockSize(const char * source)
+UInt32 ICompressionCodec::readCompressedBlockSize(const char * source) const
 {
     UInt32 compressed_block_size = unalignedLoadLittleEndian<UInt32>(&source[1]);
     if (compressed_block_size == 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Can't decompress data: header is corrupt with compressed block size 0");
+        throw Exception(decompression_error_code, "Can't decompress data: header is corrupt with compressed block size 0");
     return compressed_block_size;
 }
 
 
-UInt32 ICompressionCodec::readDecompressedBlockSize(const char * source)
+UInt32 ICompressionCodec::readDecompressedBlockSize(const char * source) const
 {
     UInt32 decompressed_block_size = unalignedLoadLittleEndian<UInt32>(&source[5]);
     if (decompressed_block_size == 0)
-        throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Can't decompress data: header is corrupt with decompressed block size 0");
+        throw Exception(decompression_error_code, "Can't decompress data: header is corrupt with decompressed block size 0");
     return decompressed_block_size;
 }
 
diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h
index 6630838fa64..ca794511268 100644
--- a/src/Compression/ICompressionCodec.h
+++ b/src/Compression/ICompressionCodec.h
@@ -13,6 +13,12 @@ namespace DB
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size);
 
+namespace ErrorCodes
+{
+    extern const int CANNOT_DECOMPRESS;
+    extern const int CORRUPTED_DATA;
+}
+
 /**
 * Represents interface for compression codecs like LZ4, ZSTD, etc.
 */
@@ -59,7 +65,10 @@ public:
     CodecMode getDecompressMode() const{ return decompressMode; }
 
     /// if set mode to CodecMode::Asynchronous, must be followed with flushAsynchronousDecompressRequests
-    void setDecompressMode(CodecMode mode){ decompressMode = mode; }
+    void setDecompressMode(CodecMode mode) { decompressMode = mode; }
+
+    /// Report decompression errors as CANNOT_DECOMPRESS, not CORRUPTED_DATA
+    void setExternalDataFlag() { decompression_error_code = ErrorCodes::CANNOT_DECOMPRESS; }
 
     /// Flush result for previous asynchronous decompression requests.
     /// This function must be called following several requests offload to HW.
@@ -82,10 +91,10 @@ public:
     static constexpr UInt8 getHeaderSize() { return COMPRESSED_BLOCK_HEADER_SIZE; }
 
     /// Read size of compressed block from compressed source
-    static UInt32 readCompressedBlockSize(const char * source);
+    UInt32 readCompressedBlockSize(const char * source) const;
 
     /// Read size of decompressed block from compressed source
-    static UInt32 readDecompressedBlockSize(const char * source);
+    UInt32 readDecompressedBlockSize(const char * source) const;
 
     /// Read method byte from compressed source
     static uint8_t readMethod(const char * source);
@@ -131,6 +140,8 @@ protected:
     /// Construct and set codec description from codec name and arguments. Must be called in codec constructor.
     void setCodecDescription(const String & name, const ASTs & arguments = {});
 
+    int decompression_error_code = ErrorCodes::CORRUPTED_DATA;
+
 private:
     ASTPtr full_codec_desc;
     CodecMode decompressMode{CodecMode::Synchronous};

From 9acd3707cb4b6f9c88aa9e7f0d77965104236b15 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sat, 4 Nov 2023 15:22:18 +0100
Subject: [PATCH 32/80] fix tests

---
 src/Storages/MergeTree/MergeTreeData.cpp            |  1 +
 src/Storages/StorageReplicatedMergeTree.cpp         | 11 ++++++++++-
 .../test_attach_without_fetching/test.py            |  2 +-
 .../integration/test_parts_delete_zookeeper/test.py |  5 ++++-
 .../test.py                                         | 13 +++----------
 .../queries/0_stateless/01825_type_json_1.reference |  6 +-----
 6 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 627d59caef1..c22bd82b69a 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3904,6 +3904,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
         /// It will add the empty part to the set of Outdated parts without making it Active (exactly what we need)
         transaction.rollback(&lock);
         new_data_part->remove_time.store(0, std::memory_order_relaxed);
+        new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
     }
 
     /// Since we can return parts in Deleting state, we have to use a wrapper that restricts access to such parts.
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 8d426e1b8b3..c627d01809b 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1365,12 +1365,13 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
     paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch);
 
     ActiveDataPartSet empty_unexpected_parts_set(format_version);
-    for (const auto & part : parts)
+    for (auto & part : parts)
     {
         if (part->rows_count || part->getState() != MergeTreeDataPartState::Active || expected_parts.contains(part->name))
             continue;
 
         empty_unexpected_parts_set.add(part->name);
+        const_cast<DataPart &>(*part).remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
     }
     if (auto empty_count = empty_unexpected_parts_set.size())
         LOG_INFO(log, "Found {} empty unexpected parts (probably some dropped parts were not cleaned up before restart): {}",
@@ -9115,6 +9116,14 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
         }
     }
 
+    if (part.rows_count == 0 && part.remove_tmp_policy == IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS)
+    {
+        /// It's a non-replicated empty part that was created to avoid unexpected parts after DROP_RANGE
+        LOG_INFO(log, "Looks like {} is a non-replicated empty part that was created to avoid unexpected parts after DROP_RANGE, "
+                      "blobs can be removed", part.name);
+        return std::make_pair(true, NameSet{});
+    }
+
     if (has_metadata_in_zookeeper.has_value() && !has_metadata_in_zookeeper)
     {
         if (zookeeper->exists(zookeeper_path))
diff --git a/tests/integration/test_attach_without_fetching/test.py b/tests/integration/test_attach_without_fetching/test.py
index 60500380b31..52c85166742 100644
--- a/tests/integration/test_attach_without_fetching/test.py
+++ b/tests/integration/test_attach_without_fetching/test.py
@@ -13,7 +13,7 @@ def fill_node(node):
         """
         CREATE TABLE IF NOT EXISTS test(n UInt32)
         ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', '{replica}')
-        ORDER BY n PARTITION BY n % 10;
+        ORDER BY n PARTITION BY n % 10 SETTINGS cleanup_delay_period=1, max_cleanup_delay_period=3;
     """.format(
             replica=node.name
         )
diff --git a/tests/integration/test_parts_delete_zookeeper/test.py b/tests/integration/test_parts_delete_zookeeper/test.py
index 9fd07e7b65d..da55162dc72 100644
--- a/tests/integration/test_parts_delete_zookeeper/test.py
+++ b/tests/integration/test_parts_delete_zookeeper/test.py
@@ -61,8 +61,11 @@ def test_merge_doesnt_work_without_zookeeper(start_cluster):
 
     node1.query("TRUNCATE TABLE test_table")
 
+    total_parts = node1.query("SELECT count(*) from system.parts where table = 'test_table'")
+    assert total_parts == "0\n" or total_parts == "1\n"
+
     assert (
-        node1.query("SELECT count(*) from system.parts where table = 'test_table'")
+        node1.query("SELECT count(*) from system.parts where table = 'test_table' and active = 1")
         == "0\n"
     )
 
diff --git a/tests/integration/test_replicated_zero_copy_projection_mutation/test.py b/tests/integration/test_replicated_zero_copy_projection_mutation/test.py
index 1b68aac08a7..4839919e23d 100644
--- a/tests/integration/test_replicated_zero_copy_projection_mutation/test.py
+++ b/tests/integration/test_replicated_zero_copy_projection_mutation/test.py
@@ -174,20 +174,13 @@ def test_hardlinks_preserved_when_projection_dropped(
             )
             ENGINE ReplicatedMergeTree('/clickhouse/tables/test_projection', '{instance}')
             ORDER BY a
+            SETTINGS cleanup_delay_period=1, max_cleanup_delay_period=3
         """
 
-        first_node_settings = """
-            SETTINGS
-                storage_policy='s3',
-                old_parts_lifetime=0
-        """
+        first_node_settings = ", storage_policy='s3', old_parts_lifetime=0"
 
         # big old_parts_lifetime value makes second node to hold outdated part for us, we make it as broken_on_start
-        second_node_settings = """
-            SETTINGS
-                storage_policy='s3',
-                old_parts_lifetime=10000
-        """
+        second_node_settings = ", storage_policy='s3', old_parts_lifetime=10000"
 
         first_cluster_node.query(create_query + first_node_settings)
         second_cluster_node.query(create_query + second_node_settings)
diff --git a/tests/queries/0_stateless/01825_type_json_1.reference b/tests/queries/0_stateless/01825_type_json_1.reference
index 3f0eaf3854a..3526e80d3d7 100644
--- a/tests/queries/0_stateless/01825_type_json_1.reference
+++ b/tests/queries/0_stateless/01825_type_json_1.reference
@@ -6,26 +6,22 @@ all_2_2_0	data	Tuple(k5 String)
 all_1_2_1	data	Tuple(k1 String, k2 Tuple(k3 String, k4 String), k5 String)
 ============
 1	['aaa','ddd']	[['bbb','ccc'],['eee','fff']]
-all_1_2_2	data	Tuple(_dummy UInt8)
 all_3_3_0	data	Tuple(k1 Nested(k2 String, k3 Nested(k4 String)))
 ============
 1	a	42
 2	b	4200
 4242
-all_1_2_3	data	Tuple(_dummy UInt8)
 all_4_4_0	data	Tuple(name String, value Int16)
 1	a	42
 2	b	4200
 3	a	42.123
-all_1_2_3	data	Tuple(_dummy UInt8)
 all_4_4_0	data	Tuple(name String, value Int16)
 all_5_5_0	data	Tuple(name String, value Float64)
 1	a	42
 2	b	4200
 3	a	42.123
 4	a	some
-all_1_2_3	data	Tuple(_dummy UInt8)
 all_4_4_0	data	Tuple(name String, value Int16)
 all_5_5_0	data	Tuple(name String, value Float64)
 all_6_6_0	data	Tuple(name String, value String)
-all_1_6_4	data	Tuple(name String, value String)
+all_4_6_4	data	Tuple(name String, value String)

From 6ad8e789f6586d20fc91f545342f12146ec778b0 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Sat, 4 Nov 2023 14:56:15 +0000
Subject: [PATCH 33/80] Automatic style fix

---
 tests/integration/test_parts_delete_zookeeper/test.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_parts_delete_zookeeper/test.py b/tests/integration/test_parts_delete_zookeeper/test.py
index da55162dc72..7b75797b5a2 100644
--- a/tests/integration/test_parts_delete_zookeeper/test.py
+++ b/tests/integration/test_parts_delete_zookeeper/test.py
@@ -61,11 +61,15 @@ def test_merge_doesnt_work_without_zookeeper(start_cluster):
 
     node1.query("TRUNCATE TABLE test_table")
 
-    total_parts = node1.query("SELECT count(*) from system.parts where table = 'test_table'")
+    total_parts = node1.query(
+        "SELECT count(*) from system.parts where table = 'test_table'"
+    )
     assert total_parts == "0\n" or total_parts == "1\n"
 
     assert (
-        node1.query("SELECT count(*) from system.parts where table = 'test_table' and active = 1")
+        node1.query(
+            "SELECT count(*) from system.parts where table = 'test_table' and active = 1"
+        )
         == "0\n"
     )
 

From 155bdfcf0dabf10150d5086cf6d5c86cae425d8b Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sat, 4 Nov 2023 22:48:29 +0100
Subject: [PATCH 34/80] fix

---
 src/Storages/StorageReplicatedMergeTree.cpp           | 2 +-
 tests/integration/test_parts_delete_zookeeper/test.py | 3 +++
 tests/queries/0_stateless/01825_type_json_1.reference | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index c627d01809b..216e4e303fe 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1365,7 +1365,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
     paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch);
 
     ActiveDataPartSet empty_unexpected_parts_set(format_version);
-    for (auto & part : parts)
+    for (const auto & part : parts)
     {
         if (part->rows_count || part->getState() != MergeTreeDataPartState::Active || expected_parts.contains(part->name))
             continue;
diff --git a/tests/integration/test_parts_delete_zookeeper/test.py b/tests/integration/test_parts_delete_zookeeper/test.py
index 7b75797b5a2..d7b5fe1cb57 100644
--- a/tests/integration/test_parts_delete_zookeeper/test.py
+++ b/tests/integration/test_parts_delete_zookeeper/test.py
@@ -73,6 +73,9 @@ def test_merge_doesnt_work_without_zookeeper(start_cluster):
         == "0\n"
     )
 
+    node1.query("DETACH TABLE test_table SYNC")
+    node1.query("ATTACH TABLE test_table")
+
     node1.query(
         "INSERT INTO test_table VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)"
     )
diff --git a/tests/queries/0_stateless/01825_type_json_1.reference b/tests/queries/0_stateless/01825_type_json_1.reference
index 3526e80d3d7..857c624fb9b 100644
--- a/tests/queries/0_stateless/01825_type_json_1.reference
+++ b/tests/queries/0_stateless/01825_type_json_1.reference
@@ -24,4 +24,4 @@ all_5_5_0	data	Tuple(name String, value Float64)
 all_4_4_0	data	Tuple(name String, value Int16)
 all_5_5_0	data	Tuple(name String, value Float64)
 all_6_6_0	data	Tuple(name String, value String)
-all_4_6_4	data	Tuple(name String, value String)
+all_4_6_1	data	Tuple(name String, value String)

From 1e89e7a6b236b8d1ac21b2174dd680d364d040c1 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sun, 5 Nov 2023 09:38:08 +0100
Subject: [PATCH 35/80] Fix using table shared id during backup and improve
 logs.

---
 src/Storages/StorageReplicatedMergeTree.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 069ed20c730..1196aa29b0a 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -186,6 +186,7 @@ namespace ErrorCodes
     extern const int NOT_INITIALIZED;
     extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
     extern const int TABLE_IS_DROPPED;
+    extern const int CANNOT_BACKUP_TABLE;
 }
 
 namespace ActionLocks
@@ -9965,8 +9966,15 @@ void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_quer
     applyMetadataChangesToCreateQuery(create_query, adjusted_metadata);
 
     /// Check that tryGetTableSharedIDFromCreateQuery() works for this storage.
-    if (tryGetTableSharedIDFromCreateQuery(*create_query, getContext()) != getTableSharedID())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} has its shared ID to be different from one from the create query");
+    auto actual_table_shared_id = getTableSharedID();
+    auto expected_table_shared_id = tryGetTableSharedIDFromCreateQuery(*create_query, getContext());
+    if (actual_table_shared_id != expected_table_shared_id)
+    {
+        throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, "Table {} has its shared ID different from one from the create query: "
+                        "actual shared id = {}, expected shared id = {}, create query = {}",
+                        getStorageID().getNameForLogs(), actual_table_shared_id, expected_table_shared_id.value_or("nullopt"),
+                        create_query);
+    }
 }
 
 void StorageReplicatedMergeTree::backupData(

From 60fa1c3d26057d569914f0a112d76ed928bb48b8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 3 Nov 2023 19:36:18 +0000
Subject: [PATCH 36/80] Cosmetics

---
 src/Functions/formatQuery.cpp                 | 63 ++++++++++---------
 .../0_stateless/02882_formatQuery.reference   | 13 ++--
 .../queries/0_stateless/02882_formatQuery.sql | 29 ++++++---
 3 files changed, 60 insertions(+), 45 deletions(-)

diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp
index f4cb937eed4..7bfae36527c 100644
--- a/src/Functions/formatQuery.cpp
+++ b/src/Functions/formatQuery.cpp
@@ -15,7 +15,13 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
 }
 
-template <bool one_line, typename Name>
+enum class OutputFormatting
+{
+    SingleLine,
+    MultiLine
+};
+
+template <OutputFormatting output_formatting, typename Name>
 class FunctionFormatQuery : public IFunction
 {
 public:
@@ -27,48 +33,40 @@ public:
     }
 
     FunctionFormatQuery(size_t max_query_size_, size_t max_parser_depth_)
-        : max_query_size(max_query_size_), max_parser_depth(max_parser_depth_)
+        : max_query_size(max_query_size_)
+        , max_parser_depth(max_parser_depth_)
     {
     }
 
     String getName() const override { return name; }
-
     size_t getNumberOfArguments() const override { return 1; }
-
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        FunctionArgumentDescriptors mandatory_args{{"query", &isString<IDataType>, nullptr, "String"}};
-        validateFunctionArgumentTypes(*this, arguments, mandatory_args);
+        FunctionArgumentDescriptors args{
+            {"query", &isString<IDataType>, nullptr, "String"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, args);
+
         return arguments[0].type;
     }
 
-    bool useDefaultImplementationForConstants() const override { return true; }
-
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
     {
-        const ColumnPtr column = arguments[0].column;
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
+        const ColumnPtr col_query = arguments[0].column;
+        if (const ColumnString * col_query_string = checkAndGetColumn<ColumnString>(col_query.get()))
         {
             auto col_res = ColumnString::create();
-            formatVector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
+            formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets());
             return col_res;
         }
         else
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName());
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", col_query->getName(), getName());
     }
 
 private:
-    void formatQueryImpl(const char * begin, const char * end, ColumnString::Chars & output) const
-    {
-        ParserQuery parser{end};
-        auto ast = parseQuery(parser, begin, end, {}, max_query_size, max_parser_depth);
-        WriteBufferFromVector buf(output, AppendModeTag{});
-        formatAST(*ast, buf, /* hilite */ false, /* one_line */ one_line);
-        buf.finalize();
-    }
     void formatVector(
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
@@ -79,18 +77,25 @@ private:
         res_offsets.resize(size);
         res_data.reserve(data.size());
 
-        size_t prev_in_offset = 0;
+        size_t prev_offset = 0;
         for (size_t i = 0; i < size; ++i)
         {
-            const auto * begin = reinterpret_cast<const char *>(&data[prev_in_offset]);
+            const char * begin = reinterpret_cast<const char *>(&data[prev_offset]);
             const char * end = begin + offsets[i] - 1;
-            formatQueryImpl(begin, end, res_data);
+
+            ParserQuery parser(end);
+            auto ast = parseQuery(parser, begin, end, /*query_description*/ {}, max_query_size, max_parser_depth);
+            WriteBufferFromVector buf(res_data, AppendModeTag{});
+            formatAST(*ast, buf, /*hilite*/ false, /*single_line*/ output_formatting == OutputFormatting::SingleLine);
+            buf.finalize();
+
             res_offsets[i] = res_data.size() + 1;
-            prev_in_offset = offsets[i];
+            prev_offset = offsets[i];
         }
     }
-    size_t max_query_size;
-    size_t max_parser_depth;
+
+    const size_t max_query_size;
+    const size_t max_parser_depth;
 };
 
 struct NameFormatQuery
@@ -105,7 +110,7 @@ struct NameFormatQuerySingleLine
 
 REGISTER_FUNCTION(formatQuery)
 {
-    factory.registerFunction<FunctionFormatQuery<false, NameFormatQuery>>(FunctionDocumentation{
+    factory.registerFunction<FunctionFormatQuery<OutputFormatting::MultiLine, NameFormatQuery>>(FunctionDocumentation{
         .description = "Returns a formatted, possibly multi-line, version of the given SQL query.\n[example:multiline]",
         .syntax = "formatQuery(query)",
         .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}},
@@ -123,7 +128,7 @@ REGISTER_FUNCTION(formatQuery)
 
 REGISTER_FUNCTION(formatQuerySingleLine)
 {
-    factory.registerFunction<FunctionFormatQuery<true, NameFormatQuerySingleLine>>(FunctionDocumentation{
+    factory.registerFunction<FunctionFormatQuery<OutputFormatting::SingleLine, NameFormatQuerySingleLine>>(FunctionDocumentation{
         .description = "Like formatQuery() but the returned formatted string contains no line breaks.\n[example:multiline]",
         .syntax = "formatQuerySingleLine(query)",
         .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}},
diff --git a/tests/queries/0_stateless/02882_formatQuery.reference b/tests/queries/0_stateless/02882_formatQuery.reference
index fd84a9505b1..2842e782e63 100644
--- a/tests/queries/0_stateless/02882_formatQuery.reference
+++ b/tests/queries/0_stateless/02882_formatQuery.reference
@@ -1,19 +1,18 @@
+-- formatQuery
 SELECT 1
 SELECT 1
 SELECT 1
-SELECT 1
+1
 1
 INSERT INTO tab FORMAT Values
 CREATE TABLE default.no_prop_table\n(\n    `some_column` UInt64\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
 EXPLAIN SYNTAX\nSELECT\n    CAST(1, \'INT\'),\n    CEIL(1),\n    CEILING(1),\n    CHAR(49),\n    CHAR_LENGTH(\'1\'),\n    CHARACTER_LENGTH(\'1\'),\n    COALESCE(1),\n    CONCAT(\'1\', \'1\'),\n    CORR(1, 1),\n    COS(1),\n    COUNT(1),\n    COVAR_POP(1, 1),\n    COVAR_SAMP(1, 1),\n    DATABASE(),\n    SCHEMA(),\n    dateDiff(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')),\n    EXP(1),\n    FLATTEN([[1]]),\n    FLOOR(1),\n    FQDN(),\n    GREATEST(1),\n    IF(1, 1, 1),\n    IFNULL(1, 1),\n    LCASE(\'A\'),\n    LEAST(1),\n    LENGTH(\'1\'),\n    LN(1),\n    LOCATE(\'1\', \'1\'),\n    LOG(1),\n    LOG10(1),\n    LOG2(1),\n    LOWER(\'A\'),\n    MAX(1),\n    MID(\'123\', 1, 1),\n    MIN(1),\n    MOD(1, 1),\n    NOT 1,\n    NOW(),\n    NOW64(),\n    NULLIF(1, 1),\n    PI(),\n    position(\'123\', \'2\'),\n    POW(1, 1),\n    POWER(1, 1),\n    RAND(),\n    REPLACE(\'1\', \'1\', \'2\'),\n    REVERSE(\'123\'),\n    ROUND(1),\n    SIN(1),\n    SQRT(1),\n    STDDEV_POP(1),\n    STDDEV_SAMP(1),\n    SUBSTR(\'123\', 2),\n    substring(\'123\', 2),\n    SUM(1),\n    TAN(1),\n    TANH(1),\n    TRUNC(1),\n    TRUNCATE(1),\n    UCASE(\'A\'),\n    UPPER(\'A\'),\n    USER(),\n    VAR_POP(1),\n    VAR_SAMP(1),\n    WEEK(toDate(\'2020-10-24\')),\n    YEARWEEK(toDate(\'2020-10-24\'))\nFORMAT TSVRaw
+-- formatQuerySingleLine
+SELECT 1
+SELECT 1
+SELECT 1
 1
-formatQuerySingleLine
-SELECT 1
-SELECT 1
-SELECT 1
-SELECT 1
 1
 INSERT INTO tab FORMAT Values
 CREATE TABLE default.no_prop_table (`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192
 EXPLAIN SYNTAX SELECT CAST(1, \'INT\'), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), dateDiff(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT 1, NOW(), NOW64(), NULLIF(1, 1), PI(), position(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), substring(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) FORMAT TSVRaw
-1
diff --git a/tests/queries/0_stateless/02882_formatQuery.sql b/tests/queries/0_stateless/02882_formatQuery.sql
index 767283552d5..c31b6e32812 100644
--- a/tests/queries/0_stateless/02882_formatQuery.sql
+++ b/tests/queries/0_stateless/02882_formatQuery.sql
@@ -1,21 +1,32 @@
-SELECT formatQuery('select 1;');
-SELECT formatQuery('select 1');
+SELECT '-- formatQuery';
+
 SELECT formatQuery('SELECT 1;');
 SELECT formatQuery('SELECT 1');
-SELECT formatQuery('select 1;') == formatQuery('SeLecT 1');
+SELECT formatQuery('SeLeCt 1;');
+SELECT formatQuery('select 1;') == formatQuery('SeLeCt 1');
+SELECT normalizedQueryHash(formatQuery('select 1')) = normalizedQueryHash(formatQuery('SELECT 1'));
+
 SELECT formatQuery('INSERT INTO tab VALUES (\'\') (\'test\')');
 SELECT formatQuery('CREATE TABLE default.no_prop_table(`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192');
 SELECT formatQuery('EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), SUBSTRING(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) format TSVRaw;');
-SELECT normalizedQueryHash(formatQuery('select 1')) = normalizedQueryHash(formatQuery('SELECT 1'));
+
+-- negative tests
+SELECT formatQuery(''); -- { serverError SYNTAX_ERROR }
 SELECT formatQuery('SEECTwrong'); -- { serverError SYNTAX_ERROR }
-SELECT 'formatQuerySingleLine';
-SELECT formatQuerySingleLine('select 1;');
-SELECT formatQuerySingleLine('select 1');
+
+SELECT '-- formatQuerySingleLine';
+
 SELECT formatQuerySingleLine('SELECT 1;');
 SELECT formatQuerySingleLine('SELECT 1');
-SELECT formatQuerySingleLine('select 1;') == formatQuerySingleLine('SeLecT 1');
+SELECT formatQuerySingleLine('SeLeCt 1;');
+SELECT formatQuerySingleLine('select 1;') == formatQuerySingleLine('SeLeCt 1');
+SELECT normalizedQueryHash(formatQuerySingleLine('select 1')) = normalizedQueryHash(formatQuerySingleLine('SELECT 1'));
+
 SELECT formatQuerySingleLine('INSERT INTO tab VALUES (\'\') (\'test\')');
+
 SELECT formatQuerySingleLine('CREATE TABLE default.no_prop_table(`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192');
 SELECT formatQuerySingleLine('EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), SUBSTRING(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) format TSVRaw;');
-SELECT normalizedQueryHash(formatQuerySingleLine('select 1')) = normalizedQueryHash(formatQuerySingleLine('SELECT 1'));
+
+-- negative tests
+SELECT formatQuerySingleLine(''); -- { serverError SYNTAX_ERROR }
 SELECT formatQuerySingleLine('SEECTwrong'); -- { serverError SYNTAX_ERROR }

From 38e9793cd7e425b2e864d9d98d982335e0a18b8e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 3 Nov 2023 22:48:52 +0000
Subject: [PATCH 37/80] Fix junk in formatQuery()

---
 src/Functions/formatQuery.cpp                 | 29 ++++++++++++++-----
 .../0_stateless/02882_formatQuery.reference   |  6 ++++
 .../queries/0_stateless/02882_formatQuery.sql | 17 +++++++++++
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp
index 7bfae36527c..a12bfd58cc8 100644
--- a/src/Functions/formatQuery.cpp
+++ b/src/Functions/formatQuery.cpp
@@ -1,7 +1,7 @@
 #include <Columns/ColumnString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
-#include <IO/WriteBufferFromVector.h>
+#include <IO/WriteBufferFromString.h>
 #include <Interpreters/Context.h>
 #include <Parsers/ParserQuery.h>
 #include <Parsers/formatAST.h>
@@ -75,23 +75,38 @@ private:
     {
         const size_t size = offsets.size();
         res_offsets.resize(size);
-        res_data.reserve(data.size());
+        res_data.resize(data.size());
 
         size_t prev_offset = 0;
+        size_t res_data_size = 0;
+
         for (size_t i = 0; i < size; ++i)
         {
             const char * begin = reinterpret_cast<const char *>(&data[prev_offset]);
-            const char * end = begin + offsets[i] - 1;
+            const char * end = begin + offsets[i] - prev_offset - 1;
 
             ParserQuery parser(end);
             auto ast = parseQuery(parser, begin, end, /*query_description*/ {}, max_query_size, max_parser_depth);
-            WriteBufferFromVector buf(res_data, AppendModeTag{});
-            formatAST(*ast, buf, /*hilite*/ false, /*single_line*/ output_formatting == OutputFormatting::SingleLine);
-            buf.finalize();
 
-            res_offsets[i] = res_data.size() + 1;
+            WriteBufferFromOwnString buf;
+            formatAST(*ast, buf, /*hilite*/ false, /*single_line*/ output_formatting == OutputFormatting::SingleLine);
+            auto formatted = buf.stringView();
+
+            const size_t res_data_new_size = res_data_size + formatted.size() + 1;
+            if (res_data_new_size > res_data.size())
+                res_data.resize(2 * res_data_new_size);
+
+            memcpy(&res_data[res_data_size], formatted.begin(), formatted.size());
+            res_data_size += formatted.size();
+
+            res_data[res_data_size] = '\0';
+            res_data_size += 1;
+
+            res_offsets[i] = res_data_size;
             prev_offset = offsets[i];
         }
+
+        res_data.resize(res_data_size);
     }
 
     const size_t max_query_size;
diff --git a/tests/queries/0_stateless/02882_formatQuery.reference b/tests/queries/0_stateless/02882_formatQuery.reference
index 2842e782e63..4cf704e83d8 100644
--- a/tests/queries/0_stateless/02882_formatQuery.reference
+++ b/tests/queries/0_stateless/02882_formatQuery.reference
@@ -7,6 +7,9 @@ SELECT 1
 INSERT INTO tab FORMAT Values
 CREATE TABLE default.no_prop_table\n(\n    `some_column` UInt64\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
 EXPLAIN SYNTAX\nSELECT\n    CAST(1, \'INT\'),\n    CEIL(1),\n    CEILING(1),\n    CHAR(49),\n    CHAR_LENGTH(\'1\'),\n    CHARACTER_LENGTH(\'1\'),\n    COALESCE(1),\n    CONCAT(\'1\', \'1\'),\n    CORR(1, 1),\n    COS(1),\n    COUNT(1),\n    COVAR_POP(1, 1),\n    COVAR_SAMP(1, 1),\n    DATABASE(),\n    SCHEMA(),\n    dateDiff(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')),\n    EXP(1),\n    FLATTEN([[1]]),\n    FLOOR(1),\n    FQDN(),\n    GREATEST(1),\n    IF(1, 1, 1),\n    IFNULL(1, 1),\n    LCASE(\'A\'),\n    LEAST(1),\n    LENGTH(\'1\'),\n    LN(1),\n    LOCATE(\'1\', \'1\'),\n    LOG(1),\n    LOG10(1),\n    LOG2(1),\n    LOWER(\'A\'),\n    MAX(1),\n    MID(\'123\', 1, 1),\n    MIN(1),\n    MOD(1, 1),\n    NOT 1,\n    NOW(),\n    NOW64(),\n    NULLIF(1, 1),\n    PI(),\n    position(\'123\', \'2\'),\n    POW(1, 1),\n    POWER(1, 1),\n    RAND(),\n    REPLACE(\'1\', \'1\', \'2\'),\n    REVERSE(\'123\'),\n    ROUND(1),\n    SIN(1),\n    SQRT(1),\n    STDDEV_POP(1),\n    STDDEV_SAMP(1),\n    SUBSTR(\'123\', 2),\n    substring(\'123\', 2),\n    SUM(1),\n    TAN(1),\n    TANH(1),\n    TRUNC(1),\n    TRUNCATE(1),\n    UCASE(\'A\'),\n    UPPER(\'A\'),\n    USER(),\n    VAR_POP(1),\n    VAR_SAMP(1),\n    WEEK(toDate(\'2020-10-24\')),\n    YEARWEEK(toDate(\'2020-10-24\'))\nFORMAT TSVRaw
+1	SELECT 1	SELECT 1
+2	SeLeCt 22	SELECT 22
+3	InSerT into TAB values (\'\')	INSERT INTO TAB FORMAT Values
 -- formatQuerySingleLine
 SELECT 1
 SELECT 1
@@ -16,3 +19,6 @@ SELECT 1
 INSERT INTO tab FORMAT Values
 CREATE TABLE default.no_prop_table (`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192
 EXPLAIN SYNTAX SELECT CAST(1, \'INT\'), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), dateDiff(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT 1, NOW(), NOW64(), NULLIF(1, 1), PI(), position(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), substring(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) FORMAT TSVRaw
+1	SELECT 1	SELECT 1
+2	SeLeCt 22	SELECT 22
+3	InSerT into TAB values (\'\')	INSERT INTO TAB FORMAT Values
diff --git a/tests/queries/0_stateless/02882_formatQuery.sql b/tests/queries/0_stateless/02882_formatQuery.sql
index c31b6e32812..a3dab8ccbac 100644
--- a/tests/queries/0_stateless/02882_formatQuery.sql
+++ b/tests/queries/0_stateless/02882_formatQuery.sql
@@ -1,3 +1,11 @@
+DROP TABLE IF EXISTS all_valid;
+CREATE TABLE all_valid (id UInt64, query String) ENGINE=MergeTree ORDER BY id;
+INSERT INTO all_valid VALUES (1, 'SELECT 1') (2, 'SeLeCt 22') (3, 'InSerT into TAB values (\'\')');
+
+DROP TABLE IF EXISTS some_invalid;
+CREATE TABLE some_invalid (id UInt64, query String) ENGINE=MergeTree ORDER BY id;
+INSERT INTO some_invalid VALUES (1, 'SELECT 1') (2, 'SeLeCt 2') (3, 'bad 3') (4, 'select 4') (5, 'bad 5') (6, '') (7, 'SELECT 7');
+
 SELECT '-- formatQuery';
 
 SELECT formatQuery('SELECT 1;');
@@ -10,9 +18,12 @@ SELECT formatQuery('INSERT INTO tab VALUES (\'\') (\'test\')');
 SELECT formatQuery('CREATE TABLE default.no_prop_table(`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192');
 SELECT formatQuery('EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), SUBSTRING(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) format TSVRaw;');
 
+SELECT id, query, formatQuery(query) FROM all_valid ORDER BY id;
+
 -- negative tests
 SELECT formatQuery(''); -- { serverError SYNTAX_ERROR }
 SELECT formatQuery('SEECTwrong'); -- { serverError SYNTAX_ERROR }
+SELECT id, query, formatQuery(query) FROM some_invalid ORDER BY id; -- { serverError SYNTAX_ERROR }
 
 SELECT '-- formatQuerySingleLine';
 
@@ -27,6 +38,12 @@ SELECT formatQuerySingleLine('INSERT INTO tab VALUES (\'\') (\'test\')');
 SELECT formatQuerySingleLine('CREATE TABLE default.no_prop_table(`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192');
 SELECT formatQuerySingleLine('EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), SUBSTRING(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) format TSVRaw;');
 
+SELECT id, query, formatQuerySingleLine(query) FROM all_valid ORDER BY id;
+
 -- negative tests
 SELECT formatQuerySingleLine(''); -- { serverError SYNTAX_ERROR }
 SELECT formatQuerySingleLine('SEECTwrong'); -- { serverError SYNTAX_ERROR }
+SELECT id, query, formatQuerySingleLine(query) FROM some_invalid ORDER BY id; -- { serverError SYNTAX_ERROR }
+
+DROP TABLE all_valid;
+DROP TABLE some_invalid;

From 8a64066ee1fdba2b7fd7937416ce40b498889fcd Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 3 Nov 2023 20:19:39 +0000
Subject: [PATCH 38/80] + formatQueryOrNull()

---
 .../functions/other-functions.md              |   6 +
 src/Functions/formatQuery.cpp                 | 112 ++++++++++++++++--
 .../0_stateless/02882_formatQuery.reference   |  20 ++++
 .../queries/0_stateless/02882_formatQuery.sql |  14 ++-
 4 files changed, 134 insertions(+), 18 deletions(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 6b092cf384d..35fd5089bf0 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -2760,10 +2760,13 @@ message Root
 
 Returns a formatted, possibly multi-line, version of the given SQL query.
 
+Throws an exception if the query is not well-formed. To return `NULL` instead, function `formatQueryOrNull()` may be used.
+
 **Syntax**
 
 ```sql
 formatQuery(query)
+formatQueryOrNull(query)
 ```
 
 **Arguments**
@@ -2796,10 +2799,13 @@ WHERE (a > 3) AND (b < 3)            │
 
 Like formatQuery() but the returned formatted string contains no line breaks.
 
+Throws an exception if the query is not well-formed. To return `NULL` instead, function `formatQuerySingleLineOrNull()` may be used.
+
 **Syntax**
 
 ```sql
 formatQuerySingleLine(query)
+formatQuerySingleLineOrNull(query)
 ```
 
 **Arguments**
diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp
index a12bfd58cc8..9fb8cabffd5 100644
--- a/src/Functions/formatQuery.cpp
+++ b/src/Functions/formatQuery.cpp
@@ -1,4 +1,6 @@
+#include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <IO/WriteBufferFromString.h>
@@ -21,7 +23,13 @@ enum class OutputFormatting
     MultiLine
 };
 
-template <OutputFormatting output_formatting, typename Name>
+enum class ErrorHandling
+{
+    Exception,
+    Null
+};
+
+template <OutputFormatting output_formatting, ErrorHandling error_handling, typename Name>
 class FunctionFormatQuery : public IFunction
 {
 public:
@@ -50,17 +58,30 @@ public:
         };
         validateFunctionArgumentTypes(*this, arguments, args);
 
-        return arguments[0].type;
+        DataTypePtr string_type = std::make_shared<DataTypeString>();
+        if constexpr (error_handling == ErrorHandling::Null)
+            return std::make_shared<DataTypeNullable>(string_type);
+        else
+            return string_type;
     }
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
         const ColumnPtr col_query = arguments[0].column;
+
+        ColumnUInt8::MutablePtr col_null_map;
+        if constexpr (error_handling == ErrorHandling::Null)
+            col_null_map = ColumnUInt8::create(input_rows_count, 0);
+
         if (const ColumnString * col_query_string = checkAndGetColumn<ColumnString>(col_query.get()))
         {
             auto col_res = ColumnString::create();
-            formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets());
-            return col_res;
+            formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map);
+
+            if constexpr (error_handling == ErrorHandling::Null)
+                return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
+            else
+                return col_res;
         }
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", col_query->getName(), getName());
@@ -71,7 +92,8 @@ private:
         const ColumnString::Chars & data,
         const ColumnString::Offsets & offsets,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets) const
+        ColumnString::Offsets & res_offsets,
+        ColumnUInt8::MutablePtr & res_null_map) const
     {
         const size_t size = offsets.size();
         res_offsets.resize(size);
@@ -86,9 +108,32 @@ private:
             const char * end = begin + offsets[i] - prev_offset - 1;
 
             ParserQuery parser(end);
-            auto ast = parseQuery(parser, begin, end, /*query_description*/ {}, max_query_size, max_parser_depth);
-
+            ASTPtr ast;
             WriteBufferFromOwnString buf;
+
+            try
+            {
+                ast = parseQuery(parser, begin, end, /*query_description*/ {}, max_query_size, max_parser_depth);
+            }
+            catch (...)
+            {
+                if constexpr (error_handling == ErrorHandling::Null)
+                {
+                    res_data[res_data_size] = '\0';
+                    res_data_size += 1;
+
+                    res_offsets[i] = res_data_size;
+                    prev_offset = offsets[i];
+                    res_null_map->getData()[i] = 1;
+                    continue;
+                }
+                else
+                {
+                    static_assert(error_handling == ErrorHandling::Exception);
+                    throw;
+                }
+            }
+
             formatAST(*ast, buf, /*hilite*/ false, /*single_line*/ output_formatting == OutputFormatting::SingleLine);
             auto formatted = buf.stringView();
 
@@ -118,15 +163,25 @@ struct NameFormatQuery
     static constexpr auto name = "formatQuery";
 };
 
+struct NameFormatQueryOrNull
+{
+    static constexpr auto name = "formatQueryOrNull";
+};
+
 struct NameFormatQuerySingleLine
 {
     static constexpr auto name = "formatQuerySingleLine";
 };
 
+struct NameFormatQuerySingleLineOrNull
+{
+    static constexpr auto name = "formatQuerySingleLineOrNull";
+};
+
 REGISTER_FUNCTION(formatQuery)
 {
-    factory.registerFunction<FunctionFormatQuery<OutputFormatting::MultiLine, NameFormatQuery>>(FunctionDocumentation{
-        .description = "Returns a formatted, possibly multi-line, version of the given SQL query.\n[example:multiline]",
+    factory.registerFunction<FunctionFormatQuery<OutputFormatting::MultiLine, ErrorHandling::Exception, NameFormatQuery>>(FunctionDocumentation{
+        .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Throws in case of a parsing error.\n[example:multiline]",
         .syntax = "formatQuery(query)",
         .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}},
         .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).",
@@ -141,10 +196,28 @@ REGISTER_FUNCTION(formatQuery)
         .categories{"Other"}});
 }
 
+REGISTER_FUNCTION(formatQueryOrNull)
+{
+    factory.registerFunction<FunctionFormatQuery<OutputFormatting::MultiLine, ErrorHandling::Null, NameFormatQueryOrNull>>(FunctionDocumentation{
+        .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Returns NULL in case of a parsing error.\n[example:multiline]",
+        .syntax = "formatQueryOrNull(query)",
+        .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}},
+        .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).",
+        .examples{
+            {"multiline",
+             "SELECT formatQuery('select a,    b FRom tab WHERE a > 3 and  b < 3');",
+             "SELECT\n"
+             "    a,\n"
+             "    b\n"
+             "FROM tab\n"
+             "WHERE (a > 3) AND (b < 3)"}},
+        .categories{"Other"}});
+}
+
 REGISTER_FUNCTION(formatQuerySingleLine)
 {
-    factory.registerFunction<FunctionFormatQuery<OutputFormatting::SingleLine, NameFormatQuerySingleLine>>(FunctionDocumentation{
-        .description = "Like formatQuery() but the returned formatted string contains no line breaks.\n[example:multiline]",
+    factory.registerFunction<FunctionFormatQuery<OutputFormatting::SingleLine, ErrorHandling::Exception, NameFormatQuerySingleLine>>(FunctionDocumentation{
+        .description = "Like formatQuery() but the returned formatted string contains no line breaks. Throws in case of a parsing error.\n[example:multiline]",
         .syntax = "formatQuerySingleLine(query)",
         .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}},
         .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).",
@@ -154,4 +227,19 @@ REGISTER_FUNCTION(formatQuerySingleLine)
              "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}},
         .categories{"Other"}});
 }
+
+REGISTER_FUNCTION(formatQuerySingleLineOrNull)
+{
+    factory.registerFunction<FunctionFormatQuery<OutputFormatting::SingleLine, ErrorHandling::Null, NameFormatQuerySingleLineOrNull>>(FunctionDocumentation{
+        .description = "Like formatQuery() but the returned formatted string contains no line breaks. Returns NULL in case of a parsing error.\n[example:multiline]",
+        .syntax = "formatQuerySingleLineOrNull(query)",
+        .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}},
+        .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).",
+        .examples{
+            {"multiline",
+             "SELECT formatQuerySingleLine('select a,    b FRom tab WHERE a > 3 and  b < 3');",
+             "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}},
+        .categories{"Other"}});
+}
+
 }
diff --git a/tests/queries/0_stateless/02882_formatQuery.reference b/tests/queries/0_stateless/02882_formatQuery.reference
index 4cf704e83d8..7907362a881 100644
--- a/tests/queries/0_stateless/02882_formatQuery.reference
+++ b/tests/queries/0_stateless/02882_formatQuery.reference
@@ -10,6 +10,16 @@ EXPLAIN SYNTAX\nSELECT\n    CAST(1, \'INT\'),\n    CEIL(1),\n    CEILING(1),\n
 1	SELECT 1	SELECT 1
 2	SeLeCt 22	SELECT 22
 3	InSerT into TAB values (\'\')	INSERT INTO TAB FORMAT Values
+1	SELECT 1	SELECT 1
+2	SeLeCt 22	SELECT 22
+3	InSerT into TAB values (\'\')	INSERT INTO TAB FORMAT Values
+1	SELECT 1	SELECT 1
+2	SeLeCt 2	SELECT 2
+3	bad 3	\N
+4	select 4	SELECT 4
+5	bad 5	\N
+6		\N
+7	SELECT 7	SELECT 7
 -- formatQuerySingleLine
 SELECT 1
 SELECT 1
@@ -22,3 +32,13 @@ EXPLAIN SYNTAX SELECT CAST(1, \'INT\'), CEIL(1), CEILING(1), CHAR(49), CHAR_LENG
 1	SELECT 1	SELECT 1
 2	SeLeCt 22	SELECT 22
 3	InSerT into TAB values (\'\')	INSERT INTO TAB FORMAT Values
+1	SELECT 1	SELECT 1
+2	SeLeCt 22	SELECT 22
+3	InSerT into TAB values (\'\')	INSERT INTO TAB FORMAT Values
+1	SELECT 1	SELECT 1
+2	SeLeCt 2	SELECT 2
+3	bad 3	\N
+4	select 4	SELECT 4
+5	bad 5	\N
+6		\N
+7	SELECT 7	SELECT 7
diff --git a/tests/queries/0_stateless/02882_formatQuery.sql b/tests/queries/0_stateless/02882_formatQuery.sql
index a3dab8ccbac..c3b3f202c9c 100644
--- a/tests/queries/0_stateless/02882_formatQuery.sql
+++ b/tests/queries/0_stateless/02882_formatQuery.sql
@@ -18,12 +18,13 @@ SELECT formatQuery('INSERT INTO tab VALUES (\'\') (\'test\')');
 SELECT formatQuery('CREATE TABLE default.no_prop_table(`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192');
 SELECT formatQuery('EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), SUBSTRING(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) format TSVRaw;');
 
-SELECT id, query, formatQuery(query) FROM all_valid ORDER BY id;
-
--- negative tests
 SELECT formatQuery(''); -- { serverError SYNTAX_ERROR }
 SELECT formatQuery('SEECTwrong'); -- { serverError SYNTAX_ERROR }
+
+SELECT id, query, formatQuery(query) FROM all_valid ORDER BY id;
 SELECT id, query, formatQuery(query) FROM some_invalid ORDER BY id; -- { serverError SYNTAX_ERROR }
+SELECT id, query, formatQueryOrNull(query) FROM all_valid ORDER BY id;
+SELECT id, query, formatQueryOrNull(query) FROM some_invalid ORDER BY id;
 
 SELECT '-- formatQuerySingleLine';
 
@@ -38,12 +39,13 @@ SELECT formatQuerySingleLine('INSERT INTO tab VALUES (\'\') (\'test\')');
 SELECT formatQuerySingleLine('CREATE TABLE default.no_prop_table(`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192');
 SELECT formatQuerySingleLine('EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), SUBSTRING(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) format TSVRaw;');
 
-SELECT id, query, formatQuerySingleLine(query) FROM all_valid ORDER BY id;
-
--- negative tests
 SELECT formatQuerySingleLine(''); -- { serverError SYNTAX_ERROR }
 SELECT formatQuerySingleLine('SEECTwrong'); -- { serverError SYNTAX_ERROR }
+
+SELECT id, query, formatQuerySingleLine(query) FROM all_valid ORDER BY id;
 SELECT id, query, formatQuerySingleLine(query) FROM some_invalid ORDER BY id; -- { serverError SYNTAX_ERROR }
+SELECT id, query, formatQuerySingleLineOrNull(query) FROM all_valid ORDER BY id;
+SELECT id, query, formatQuerySingleLineOrNull(query) FROM some_invalid ORDER BY id;
 
 DROP TABLE all_valid;
 DROP TABLE some_invalid;

From 049c811a38cd8bd176fa3d01dbeab86f0e285ad5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 2 Nov 2023 19:15:29 +0000
Subject: [PATCH 39/80] Make type names in SHOW COLUMNS dependent only on
 connection type

---
 docs/en/operations/settings/settings.md       |  15 +-
 docs/en/sql-reference/statements/show.md      |   2 +-
 src/Core/Settings.h                           |   6 +-
 .../InterpreterShowColumnsQuery.cpp           |   6 +-
 ...w_columns_called_from_clickhouse.reference |  60 +++
 ...5_show_columns_called_from_clickhouse.sql} |  24 +-
 ...2775_show_columns_called_from_mysql.expect | 299 ++++++++++++++
 ...5_show_columns_called_from_mysql.reference |   0
 ...show_columns_mysql_compatibility.reference | 366 ------------------
 9 files changed, 375 insertions(+), 403 deletions(-)
 create mode 100644 tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.reference
 rename tests/queries/0_stateless/{02775_show_columns_mysql_compatibility.sql => 02775_show_columns_called_from_clickhouse.sql} (67%)
 create mode 100755 tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect
 create mode 100644 tests/queries/0_stateless/02775_show_columns_called_from_mysql.reference
 delete mode 100644 tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 8c138b7ea0a..100dcb83f79 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3310,22 +3310,11 @@ Possible values:
 
 Default value: `0`.
 
-## use_mysql_types_in_show_columns {#use_mysql_types_in_show_columns}
-
-Show the names of MySQL data types corresponding to ClickHouse data types in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
-
-Possible values:
-
-- 0 - Show names of native ClickHouse data types.
-- 1 - Show names of MySQL data types corresponding to ClickHouse data types.
-
-Default value: `0`.
-
 ## mysql_map_string_to_text_in_show_columns {#mysql_map_string_to_text_in_show_columns}
 
 When enabled, [String](../../sql-reference/data-types/string.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
 
-Has effect only when [use_mysql_types_in_show_columns](#use_mysql_types_in_show_columns) is enabled.
+Has an effect only when the connection is made through the MySQL wire protocol.
 
 - 0 - Use `BLOB`.
 - 1 - Use `TEXT`.
@@ -3336,7 +3325,7 @@ Default value: `0`.
 
 When enabled, [FixedString](../../sql-reference/data-types/fixedstring.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
 
-Has effect only when [use_mysql_types_in_show_columns](#use_mysql_types_in_show_columns) is enabled.
+Has an effect only when the connection is made through the MySQL wire protocol.
 
 - 0 - Use `BLOB`.
 - 1 - Use `TEXT`.
diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 6ad9c247d02..029ca4adf3b 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -207,7 +207,7 @@ The optional keyword `FULL` causes the output to include the collation, comment
 
 The statement produces a result table with the following structure:
 - `field` - The name of the column (String)
-- `type` - The column data type. If setting `[use_mysql_types_in_show_columns](../../operations/settings/settings.md#use_mysql_types_in_show_columns) = 1` (default: 0), then the equivalent type name in MySQL is shown. (String)
+- `type` - The column data type. If the query was made through the MySQL wire protocol, then the equivalent type name in MySQL is shown. (String)
 - `null` - `YES` if the column data type is Nullable, `NO` otherwise (String)
 - `key` - `PRI` if the column is part of the primary key, `SOR` if the column is part of the sorting key, empty otherwise (String)
 - `default` - Default expression of the column if it is of type `ALIAS`, `DEFAULT`, or `MATERIALIZED`, otherwise `NULL`. (Nullable(String))
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 609ade4cdc0..4ab022209ef 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -208,9 +208,8 @@ class IColumn;
     M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
     \
     M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
-    M(Bool, use_mysql_types_in_show_columns, false, "Show native MySQL types in SHOW [FULL] COLUMNS", 0) \
-    M(Bool, mysql_map_string_to_text_in_show_columns, false, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Will only take effect if use_mysql_types_in_show_columns is enabled too", 0) \
-    M(Bool, mysql_map_fixed_string_to_text_in_show_columns, false, "If enabled, FixedString type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Will only take effect if use_mysql_types_in_show_columns is enabled too", 0) \
+    M(Bool, mysql_map_string_to_text_in_show_columns, false, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise.", 0) \
+    M(Bool, mysql_map_fixed_string_to_text_in_show_columns, false, "If enabled, FixedString type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise.", 0) \
     \
     M(UInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \
     \
@@ -848,6 +847,7 @@ class IColumn;
     MAKE_OBSOLETE(M, UInt64, merge_tree_clear_old_parts_interval_seconds, 1) \
     MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \
     MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \
+    MAKE_OBSOLETE(M, Bool, use_mysql_types_in_show_columns, false) \
     /* moved to config.xml: see also src/Core/ServerSettings.h */ \
     MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_buffer_flush_schedule_pool_size, 16) \
     MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_pool_size, 16) \
diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp
index c8fb64e37f2..9dea0b9a188 100644
--- a/src/Interpreters/InterpreterShowColumnsQuery.cpp
+++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp
@@ -6,6 +6,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <Parsers/ASTShowColumnsQuery.h>
 #include <Parsers/formatAST.h>
+#include <Interpreters/ClientInfo.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/executeQuery.h>
 
@@ -25,8 +26,10 @@ String InterpreterShowColumnsQuery::getRewrittenQuery()
 {
     const auto & query = query_ptr->as<ASTShowColumnsQuery &>();
 
+    ClientInfo::Interface client_interface = getContext()->getClientInfo().interface;
+    const bool use_mysql_types = (client_interface == ClientInfo::Interface::MYSQL); // connection made through MySQL wire protocol
+
     const auto & settings = getContext()->getSettingsRef();
-    const bool use_mysql_types = settings.use_mysql_types_in_show_columns;
     const bool remap_string_as_text = settings.mysql_map_string_to_text_in_show_columns;
     const bool remap_fixed_string_as_text = settings.mysql_map_fixed_string_to_text_in_show_columns;
 
@@ -39,7 +42,6 @@ String InterpreterShowColumnsQuery::getRewrittenQuery()
     if (use_mysql_types)
     {
         /// Cheapskate SQL-based mapping from native types to MySQL types, see https://dev.mysql.com/doc/refman/8.0/en/data-types.html
-        /// Only used with setting 'use_mysql_types_in_show_columns = 1'
         /// Known issues:
         /// - Enums are translated to TEXT
         rewritten_query += fmt::format(
diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.reference b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.reference
new file mode 100644
index 00000000000..de0f151db7d
--- /dev/null
+++ b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.reference
@@ -0,0 +1,60 @@
+a	Array(String)	NO		\N	
+agg	AggregateFunction(uniq, UInt64)	NO		\N	
+b	Bool	NO		\N	
+d	Date	NO		\N	
+d32	Date32	NO		\N	
+dec128	Decimal(38, 2)	NO		\N	
+dec128_native	Decimal(35, 30)	NO		\N	
+dec128_text	Decimal(35, 31)	NO		\N	
+dec256	Decimal(76, 2)	NO		\N	
+dec256_native	Decimal(65, 2)	NO		\N	
+dec256_text	Decimal(66, 2)	NO		\N	
+dec32	Decimal(9, 2)	NO		\N	
+dec64	Decimal(18, 2)	NO		\N	
+dt	DateTime	NO		\N	
+dt64	DateTime64(3)	NO		\N	
+dt64_3_tz1	DateTime64(3, \'UTC\')	NO		\N	
+dt64_3_tz2	DateTime64(3, \'Asia/Shanghai\')	NO		\N	
+dt64_6	DateTime64(6, \'UTC\')	NO		\N	
+dt64_9	DateTime64(9, \'UTC\')	NO		\N	
+dt_tz1	DateTime(\'UTC\')	NO		\N	
+dt_tz2	DateTime(\'Europe/Amsterdam\')	NO		\N	
+enm	Enum8(\'hallo\' = 1, \'welt\' = 2)	NO		\N	
+f32	Float32	NO		\N	
+f64	Float64	NO		\N	
+fs	FixedString(3)	NO		\N	
+i128	Int128	NO		\N	
+i16	Int16	NO		\N	
+i256	Int256	NO		\N	
+i32	Int32	NO		\N	
+i64	Int64	NO		\N	
+i8	Int8	NO		\N	
+ip4	IPv4	NO		\N	
+ip6	IPv6	NO		\N	
+lfs	LowCardinality(FixedString(3))	NO		\N	
+lnfs	LowCardinality(Nullable(FixedString(3)))	YES		\N	
+lns	LowCardinality(Nullable(String))	YES		\N	
+ls	LowCardinality(String)	NO		\N	
+m	Map(Int32, String)	NO		\N	
+m_complex	Map(Int32, Map(Int32, LowCardinality(Nullable(String))))	NO		\N	
+mpg	MultiPolygon	NO		\N	
+ndt64	Nullable(DateTime64(3))	YES		\N	
+ndt64_tz	Nullable(DateTime64(3, \'Asia/Shanghai\'))	YES		\N	
+nested.col1	Array(String)	NO		\N	
+nested.col2	Array(UInt32)	NO		\N	
+nfs	Nullable(FixedString(3))	YES		\N	
+ns	Nullable(String)	YES		\N	
+o	Object(\'json\')	NO		\N	
+p	Point	NO		\N	
+pg	Polygon	NO		\N	
+r	Ring	NO		\N	
+s	String	NO		\N	
+sagg	SimpleAggregateFunction(sum, Float64)	NO		\N	
+t	Tuple(Int32, String, Nullable(String), LowCardinality(String), LowCardinality(Nullable(String)), Tuple(Int32, String))	NO		\N	
+ui128	UInt128	NO		\N	
+ui16	UInt16	NO		\N	
+ui256	UInt256	NO		\N	
+ui32	UInt32	NO		\N	
+ui64	UInt64	NO		\N	
+ui8	UInt8	NO		\N	
+uuid	UUID	NO		\N	
diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sql b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
similarity index 67%
rename from tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sql
rename to tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
index e447dee47ed..89073bd2943 100644
--- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sql
+++ b/tests/queries/0_stateless/02775_show_columns_called_from_clickhouse.sql
@@ -2,7 +2,11 @@
 -- no-fasttest: json type needs rapidjson library, geo types need s2 geometry
 -- no-parallel: can't provide currentDatabase() to SHOW COLUMNS
 
--- Tests setting 'use_mysql_types_in_show_columns' in SHOW COLUMNS and SELECTs on system.columns
+-- Tests the output of SHOW COLUMNS when called through the ClickHouse protocol.
+
+-- -----------------------------------------------------------------------------------
+-- Please keep this test in-sync with 02775_show_columns_called_through_mysql.sql
+-- -----------------------------------------------------------------------------------
 
 DROP TABLE IF EXISTS tab;
 
@@ -72,22 +76,6 @@ CREATE TABLE tab
     lnfs          LowCardinality(Nullable(FixedString(3))),
 ) ENGINE Memory;
 
-SELECT '-- SHOW COLUMNS with use_mysql_types_in_show_columns = 0';
-SHOW COLUMNS FROM tab SETTINGS use_mysql_types_in_show_columns = 0;
-
-SELECT '-- SHOW COLUMNS with use_mysql_types_in_show_columns = 1';
-SHOW COLUMNS FROM tab SETTINGS use_mysql_types_in_show_columns = 1;
-
-SELECT '-- SHOW COLUMNS with mysql_map_string_to_text_in_show_columns = 1';
-SHOW COLUMNS FROM tab SETTINGS use_mysql_types_in_show_columns = 1, mysql_map_string_to_text_in_show_columns=1;
-
-SELECT '-- SHOW COLUMNS with mysql_map_fixed_string_to_text_in_show_columns = 1';
-SHOW COLUMNS FROM tab SETTINGS use_mysql_types_in_show_columns = 1, mysql_map_fixed_string_to_text_in_show_columns=1;
-
-SELECT '-- SHOW COLUMNS with mysql_map_string_to_text_in_show_columns = 1 and without use_mysql_types_in_show_columns';
-SHOW COLUMNS FROM tab SETTINGS use_mysql_types_in_show_columns = 0, mysql_map_string_to_text_in_show_columns=1;
-
-SELECT '-- SHOW COLUMNS with mysql_map_fixed_string_to_text_in_show_columns = 1 and without use_mysql_types_in_show_columns';
-SHOW COLUMNS FROM tab SETTINGS use_mysql_types_in_show_columns = 0, mysql_map_fixed_string_to_text_in_show_columns=1;
+SHOW COLUMNS FROM tab;
 
 DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect
new file mode 100755
index 00000000000..bef5bd10ff3
--- /dev/null
+++ b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.expect
@@ -0,0 +1,299 @@
+#!/usr/bin/expect -f
+# Tags: no-fasttest, no-parallel
+# no-fasttest: requires mysql client, rapidjson and s2geometry
+# no-parallel: can't provide currentDatabase() to SHOW COLUMNS
+
+# Tests the output of SHOW COLUMNS when called through the MySQL protocol.
+
+# -----------------------------------------------------------------------------------
+# Please keep this test in-sync with 02775_show_columns_called_through_clickhouse.sql
+# -----------------------------------------------------------------------------------
+
+set basedir [file dirname $argv0]
+set basename [file tail $argv0]
+exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+log_user 0
+set timeout 60
+match_max 100000
+expect_after {
+    # Do not ignore eof from expect
+    -i $any_spawn_id eof { exp_continue }
+    # A default timeout action is to do nothing, change it to fail
+    -i $any_spawn_id timeout { exit 1 }
+}
+spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT"
+expect -nocase -re "mysql.*> "
+
+send -- "DROP TABLE IF EXISTS tab;\r"
+expect "Query OK, 0 rows affected"
+
+send -- "SET allow_suspicious_low_cardinality_types=1;\r"
+send -- "SET allow_experimental_object_type=1;\r"
+
+send -- "
+CREATE TABLE tab
+(
+    i8            Int8,
+    i16           Int16,
+    i32           Int32,
+    i64           Int64,
+    i128          Int128,
+    i256          Int256,
+    ui8           UInt8,
+    ui16          UInt16,
+    ui32          UInt32,
+    ui64          UInt64,
+    ui128         UInt128,
+    ui256         UInt256,
+    f32           Float32,
+    f64           Float64,
+    dec32         Decimal32(2),
+    dec64         Decimal64(2),
+    dec128        Decimal128(2),
+    dec128_native Decimal(35, 30),
+    dec128_text   Decimal(35, 31),
+    dec256        Decimal256(2),
+    dec256_native Decimal(65, 2),
+    dec256_text   Decimal(66, 2),
+    p             Point,
+    r             Ring,
+    pg            Polygon,
+    mpg           MultiPolygon,
+    b             Bool,
+    s             String,
+    fs            FixedString(3),
+    uuid          UUID,
+    d             Date,
+    d32           Date32,
+    dt            DateTime,
+    dt_tz1        DateTime('UTC'),
+    dt_tz2        DateTime('Europe/Amsterdam'),
+    dt64          DateTime64(3),
+    dt64_3_tz1    DateTime64(3, 'UTC'),
+    dt64_3_tz2    DateTime64(3, 'Asia/Shanghai'),
+    dt64_6        DateTime64(6, 'UTC'),
+    dt64_9        DateTime64(9, 'UTC'),
+    enm           Enum('hallo' = 1, 'welt' = 2),
+    agg           AggregateFunction(uniq, UInt64),
+    sagg          SimpleAggregateFunction(sum, Double),
+    a             Array(String),
+    o             JSON,
+    t             Tuple(Int32, String, Nullable(String), LowCardinality(String), LowCardinality(Nullable(String)), Tuple(Int32, String)),
+    m             Map(Int32, String),
+    m_complex     Map(Int32, Map(Int32, LowCardinality(Nullable(String)))),
+    nested        Nested (col1 String, col2 UInt32),
+    ip4           IPv4,
+    ip6           IPv6,
+    ns            Nullable(String),
+    nfs           Nullable(FixedString(3)),
+    ndt64         Nullable(DateTime64(3)),
+    ndt64_tz      Nullable(DateTime64(3, 'Asia/Shanghai')),
+    ls            LowCardinality(String),
+    lfs           LowCardinality(FixedString(3)),
+    lns           LowCardinality(Nullable(String)),
+    lnfs          LowCardinality(Nullable(FixedString(3))),
+) ENGINE Memory;\r
+"
+
+send -- "SHOW COLUMNS FROM tab;\r"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+expect -- "| field         | type              | null | key  | default | extra |"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+expect -- "| a             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| agg           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| b             | TINYINT           | NO   |      | NULL    |       |"
+expect -- "| d             | DATE              | NO   |      | NULL    |       |"
+expect -- "| d32           | DATE              | NO   |      | NULL    |       |"
+expect -- "| dec128        | DECIMAL(38, 2)    | NO   |      | NULL    |       |"
+expect -- "| dec128_native | DECIMAL(35, 30)   | NO   |      | NULL    |       |"
+expect -- "| dec128_text   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec256        | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec256_native | DECIMAL(65, 2)    | NO   |      | NULL    |       |"
+expect -- "| dec256_text   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec32         | DECIMAL(9, 2)     | NO   |      | NULL    |       |"
+expect -- "| dec64         | DECIMAL(18, 2)    | NO   |      | NULL    |       |"
+expect -- "| dt            | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64          | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_3_tz1    | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_3_tz2    | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_6        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_9        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt_tz1        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt_tz2        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| enm           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| f32           | FLOAT             | NO   |      | NULL    |       |"
+expect -- "| f64           | DOUBLE            | NO   |      | NULL    |       |"
+expect -- "| fs            | BLOB              | NO   |      | NULL    |       |"
+expect -- "| i128          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| i16           | SMALLINT          | NO   |      | NULL    |       |"
+expect -- "| i256          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| i32           | INTEGER           | NO   |      | NULL    |       |"
+expect -- "| i64           | BIGINT            | NO   |      | NULL    |       |"
+expect -- "| i8            | TINYINT           | NO   |      | NULL    |       |"
+expect -- "| ip4           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ip6           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| lfs           | BLOB              | NO   |      | NULL    |       |"
+expect -- "| lnfs          | BLOB              | YES  |      | NULL    |       |"
+expect -- "| lns           | BLOB              | YES  |      | NULL    |       |"
+expect -- "| ls            | BLOB              | NO   |      | NULL    |       |"
+expect -- "| m             | JSON              | NO   |      | NULL    |       |"
+expect -- "| m_complex     | JSON              | NO   |      | NULL    |       |"
+expect -- "| mpg           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ndt64         | DATETIME          | YES  |      | NULL    |       |"
+expect -- "| ndt64_tz      | DATETIME          | YES  |      | NULL    |       |"
+expect -- "| nested.col1   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| nested.col2   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| nfs           | BLOB              | YES  |      | NULL    |       |"
+expect -- "| ns            | BLOB              | YES  |      | NULL    |       |"
+expect -- "| o             | JSON              | NO   |      | NULL    |       |"
+expect -- "| p             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| pg            | TEXT              | NO   |      | NULL    |       |"
+expect -- "| r             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| s             | BLOB              | NO   |      | NULL    |       |"
+expect -- "| sagg          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| t             | JSON              | NO   |      | NULL    |       |"
+expect -- "| ui128         | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ui16          | SMALLINT UNSIGNED | NO   |      | NULL    |       |"
+expect -- "| ui256         | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ui32          | INTEGER UNSIGNED  | NO   |      | NULL    |       |"
+expect -- "| ui64          | BIGINT UNSIGNED   | NO   |      | NULL    |       |"
+expect -- "| ui8           | TINYINT UNSIGNED  | NO   |      | NULL    |       |"
+expect -- "| uuid          | CHAR              | NO   |      | NULL    |       |"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+
+send -- "SHOW COLUMNS FROM tab SETTINGS mysql_map_string_to_text_in_show_columns=1;\r"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+expect -- "| field         | type              | null | key  | default | extra |"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+expect -- "| a             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| agg           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| b             | TINYINT           | NO   |      | NULL    |       |"
+expect -- "| d             | DATE              | NO   |      | NULL    |       |"
+expect -- "| d32           | DATE              | NO   |      | NULL    |       |"
+expect -- "| dec128        | DECIMAL(38, 2)    | NO   |      | NULL    |       |"
+expect -- "| dec128_native | DECIMAL(35, 30)   | NO   |      | NULL    |       |"
+expect -- "| dec128_text   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec256        | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec256_native | DECIMAL(65, 2)    | NO   |      | NULL    |       |"
+expect -- "| dec256_text   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec32         | DECIMAL(9, 2)     | NO   |      | NULL    |       |"
+expect -- "| dec64         | DECIMAL(18, 2)    | NO   |      | NULL    |       |"
+expect -- "| dt            | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64          | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_3_tz1    | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_3_tz2    | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_6        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_9        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt_tz1        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt_tz2        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| enm           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| f32           | FLOAT             | NO   |      | NULL    |       |"
+expect -- "| f64           | DOUBLE            | NO   |      | NULL    |       |"
+expect -- "| fs            | BLOB              | NO   |      | NULL    |       |"
+expect -- "| i128          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| i16           | SMALLINT          | NO   |      | NULL    |       |"
+expect -- "| i256          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| i32           | INTEGER           | NO   |      | NULL    |       |"
+expect -- "| i64           | BIGINT            | NO   |      | NULL    |       |"
+expect -- "| i8            | TINYINT           | NO   |      | NULL    |       |"
+expect -- "| ip4           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ip6           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| lfs           | BLOB              | NO   |      | NULL    |       |"
+expect -- "| lnfs          | BLOB              | YES  |      | NULL    |       |"
+expect -- "| lns           | TEXT              | YES  |      | NULL    |       |"
+expect -- "| ls            | TEXT              | NO   |      | NULL    |       |"
+expect -- "| m             | JSON              | NO   |      | NULL    |       |"
+expect -- "| m_complex     | JSON              | NO   |      | NULL    |       |"
+expect -- "| mpg           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ndt64         | DATETIME          | YES  |      | NULL    |       |"
+expect -- "| ndt64_tz      | DATETIME          | YES  |      | NULL    |       |"
+expect -- "| nested.col1   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| nested.col2   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| nfs           | BLOB              | YES  |      | NULL    |       |"
+expect -- "| ns            | TEXT              | YES  |      | NULL    |       |"
+expect -- "| o             | JSON              | NO   |      | NULL    |       |"
+expect -- "| p             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| pg            | TEXT              | NO   |      | NULL    |       |"
+expect -- "| r             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| s             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| sagg          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| t             | JSON              | NO   |      | NULL    |       |"
+expect -- "| ui128         | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ui16          | SMALLINT UNSIGNED | NO   |      | NULL    |       |"
+expect -- "| ui256         | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ui32          | INTEGER UNSIGNED  | NO   |      | NULL    |       |"
+expect -- "| ui64          | BIGINT UNSIGNED   | NO   |      | NULL    |       |"
+expect -- "| ui8           | TINYINT UNSIGNED  | NO   |      | NULL    |       |"
+expect -- "| uuid          | CHAR              | NO   |      | NULL    |       |"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+
+send -- "SHOW COLUMNS FROM tab SETTINGS mysql_map_fixed_string_to_text_in_show_columns=1;\r"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+expect -- "| field         | type              | null | key  | default | extra |"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+expect -- "| a             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| agg           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| b             | TINYINT           | NO   |      | NULL    |       |"
+expect -- "| d             | DATE              | NO   |      | NULL    |       |"
+expect -- "| d32           | DATE              | NO   |      | NULL    |       |"
+expect -- "| dec128        | DECIMAL(38, 2)    | NO   |      | NULL    |       |"
+expect -- "| dec128_native | DECIMAL(35, 30)   | NO   |      | NULL    |       |"
+expect -- "| dec128_text   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec256        | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec256_native | DECIMAL(65, 2)    | NO   |      | NULL    |       |"
+expect -- "| dec256_text   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| dec32         | DECIMAL(9, 2)     | NO   |      | NULL    |       |"
+expect -- "| dec64         | DECIMAL(18, 2)    | NO   |      | NULL    |       |"
+expect -- "| dt            | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64          | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_3_tz1    | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_3_tz2    | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_6        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt64_9        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt_tz1        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| dt_tz2        | DATETIME          | NO   |      | NULL    |       |"
+expect -- "| enm           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| f32           | FLOAT             | NO   |      | NULL    |       |"
+expect -- "| f64           | DOUBLE            | NO   |      | NULL    |       |"
+expect -- "| fs            | TEXT              | NO   |      | NULL    |       |"
+expect -- "| i128          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| i16           | SMALLINT          | NO   |      | NULL    |       |"
+expect -- "| i256          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| i32           | INTEGER           | NO   |      | NULL    |       |"
+expect -- "| i64           | BIGINT            | NO   |      | NULL    |       |"
+expect -- "| i8            | TINYINT           | NO   |      | NULL    |       |"
+expect -- "| ip4           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ip6           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| lfs           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| lnfs          | TEXT              | YES  |      | NULL    |       |"
+expect -- "| lns           | BLOB              | YES  |      | NULL    |       |"
+expect -- "| ls            | BLOB              | NO   |      | NULL    |       |"
+expect -- "| m             | JSON              | NO   |      | NULL    |       |"
+expect -- "| m_complex     | JSON              | NO   |      | NULL    |       |"
+expect -- "| mpg           | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ndt64         | DATETIME          | YES  |      | NULL    |       |"
+expect -- "| ndt64_tz      | DATETIME          | YES  |      | NULL    |       |"
+expect -- "| nested.col1   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| nested.col2   | TEXT              | NO   |      | NULL    |       |"
+expect -- "| nfs           | TEXT              | YES  |      | NULL    |       |"
+expect -- "| ns            | BLOB              | YES  |      | NULL    |       |"
+expect -- "| o             | JSON              | NO   |      | NULL    |       |"
+expect -- "| p             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| pg            | TEXT              | NO   |      | NULL    |       |"
+expect -- "| r             | TEXT              | NO   |      | NULL    |       |"
+expect -- "| s             | BLOB              | NO   |      | NULL    |       |"
+expect -- "| sagg          | TEXT              | NO   |      | NULL    |       |"
+expect -- "| t             | JSON              | NO   |      | NULL    |       |"
+expect -- "| ui128         | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ui16          | SMALLINT UNSIGNED | NO   |      | NULL    |       |"
+expect -- "| ui256         | TEXT              | NO   |      | NULL    |       |"
+expect -- "| ui32          | INTEGER UNSIGNED  | NO   |      | NULL    |       |"
+expect -- "| ui64          | BIGINT UNSIGNED   | NO   |      | NULL    |       |"
+expect -- "| ui8           | TINYINT UNSIGNED  | NO   |      | NULL    |       |"
+expect -- "| uuid          | CHAR              | NO   |      | NULL    |       |"
+expect -- "+---------------+-------------------+------+------+---------+-------+"
+
+send -- "DROP TABLE tab;"
+
+send -- "quit;\r"
+expect eof
diff --git a/tests/queries/0_stateless/02775_show_columns_called_from_mysql.reference b/tests/queries/0_stateless/02775_show_columns_called_from_mysql.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference
deleted file mode 100644
index e038a3362df..00000000000
--- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference
+++ /dev/null
@@ -1,366 +0,0 @@
--- SHOW COLUMNS with use_mysql_types_in_show_columns = 0
-a	Array(String)	NO		\N	
-agg	AggregateFunction(uniq, UInt64)	NO		\N	
-b	Bool	NO		\N	
-d	Date	NO		\N	
-d32	Date32	NO		\N	
-dec128	Decimal(38, 2)	NO		\N	
-dec128_native	Decimal(35, 30)	NO		\N	
-dec128_text	Decimal(35, 31)	NO		\N	
-dec256	Decimal(76, 2)	NO		\N	
-dec256_native	Decimal(65, 2)	NO		\N	
-dec256_text	Decimal(66, 2)	NO		\N	
-dec32	Decimal(9, 2)	NO		\N	
-dec64	Decimal(18, 2)	NO		\N	
-dt	DateTime	NO		\N	
-dt64	DateTime64(3)	NO		\N	
-dt64_3_tz1	DateTime64(3, \'UTC\')	NO		\N	
-dt64_3_tz2	DateTime64(3, \'Asia/Shanghai\')	NO		\N	
-dt64_6	DateTime64(6, \'UTC\')	NO		\N	
-dt64_9	DateTime64(9, \'UTC\')	NO		\N	
-dt_tz1	DateTime(\'UTC\')	NO		\N	
-dt_tz2	DateTime(\'Europe/Amsterdam\')	NO		\N	
-enm	Enum8(\'hallo\' = 1, \'welt\' = 2)	NO		\N	
-f32	Float32	NO		\N	
-f64	Float64	NO		\N	
-fs	FixedString(3)	NO		\N	
-i128	Int128	NO		\N	
-i16	Int16	NO		\N	
-i256	Int256	NO		\N	
-i32	Int32	NO		\N	
-i64	Int64	NO		\N	
-i8	Int8	NO		\N	
-ip4	IPv4	NO		\N	
-ip6	IPv6	NO		\N	
-lfs	LowCardinality(FixedString(3))	NO		\N	
-lnfs	LowCardinality(Nullable(FixedString(3)))	YES		\N	
-lns	LowCardinality(Nullable(String))	YES		\N	
-ls	LowCardinality(String)	NO		\N	
-m	Map(Int32, String)	NO		\N	
-m_complex	Map(Int32, Map(Int32, LowCardinality(Nullable(String))))	NO		\N	
-mpg	MultiPolygon	NO		\N	
-ndt64	Nullable(DateTime64(3))	YES		\N	
-ndt64_tz	Nullable(DateTime64(3, \'Asia/Shanghai\'))	YES		\N	
-nested.col1	Array(String)	NO		\N	
-nested.col2	Array(UInt32)	NO		\N	
-nfs	Nullable(FixedString(3))	YES		\N	
-ns	Nullable(String)	YES		\N	
-o	Object(\'json\')	NO		\N	
-p	Point	NO		\N	
-pg	Polygon	NO		\N	
-r	Ring	NO		\N	
-s	String	NO		\N	
-sagg	SimpleAggregateFunction(sum, Float64)	NO		\N	
-t	Tuple(Int32, String, Nullable(String), LowCardinality(String), LowCardinality(Nullable(String)), Tuple(Int32, String))	NO		\N	
-ui128	UInt128	NO		\N	
-ui16	UInt16	NO		\N	
-ui256	UInt256	NO		\N	
-ui32	UInt32	NO		\N	
-ui64	UInt64	NO		\N	
-ui8	UInt8	NO		\N	
-uuid	UUID	NO		\N	
--- SHOW COLUMNS with use_mysql_types_in_show_columns = 1
-a	TEXT	NO		\N	
-agg	TEXT	NO		\N	
-b	TINYINT	NO		\N	
-d	DATE	NO		\N	
-d32	DATE	NO		\N	
-dec128	DECIMAL(38, 2)	NO		\N	
-dec128_native	DECIMAL(35, 30)	NO		\N	
-dec128_text	TEXT	NO		\N	
-dec256	TEXT	NO		\N	
-dec256_native	DECIMAL(65, 2)	NO		\N	
-dec256_text	TEXT	NO		\N	
-dec32	DECIMAL(9, 2)	NO		\N	
-dec64	DECIMAL(18, 2)	NO		\N	
-dt	DATETIME	NO		\N	
-dt64	DATETIME	NO		\N	
-dt64_3_tz1	DATETIME	NO		\N	
-dt64_3_tz2	DATETIME	NO		\N	
-dt64_6	DATETIME	NO		\N	
-dt64_9	DATETIME	NO		\N	
-dt_tz1	DATETIME	NO		\N	
-dt_tz2	DATETIME	NO		\N	
-enm	TEXT	NO		\N	
-f32	FLOAT	NO		\N	
-f64	DOUBLE	NO		\N	
-fs	BLOB	NO		\N	
-i128	TEXT	NO		\N	
-i16	SMALLINT	NO		\N	
-i256	TEXT	NO		\N	
-i32	INTEGER	NO		\N	
-i64	BIGINT	NO		\N	
-i8	TINYINT	NO		\N	
-ip4	TEXT	NO		\N	
-ip6	TEXT	NO		\N	
-lfs	BLOB	NO		\N	
-lnfs	BLOB	YES		\N	
-lns	BLOB	YES		\N	
-ls	BLOB	NO		\N	
-m	JSON	NO		\N	
-m_complex	JSON	NO		\N	
-mpg	TEXT	NO		\N	
-ndt64	DATETIME	YES		\N	
-ndt64_tz	DATETIME	YES		\N	
-nested.col1	TEXT	NO		\N	
-nested.col2	TEXT	NO		\N	
-nfs	BLOB	YES		\N	
-ns	BLOB	YES		\N	
-o	JSON	NO		\N	
-p	TEXT	NO		\N	
-pg	TEXT	NO		\N	
-r	TEXT	NO		\N	
-s	BLOB	NO		\N	
-sagg	TEXT	NO		\N	
-t	JSON	NO		\N	
-ui128	TEXT	NO		\N	
-ui16	SMALLINT UNSIGNED	NO		\N	
-ui256	TEXT	NO		\N	
-ui32	INTEGER UNSIGNED	NO		\N	
-ui64	BIGINT UNSIGNED	NO		\N	
-ui8	TINYINT UNSIGNED	NO		\N	
-uuid	CHAR	NO		\N	
--- SHOW COLUMNS with mysql_map_string_to_text_in_show_columns = 1
-a	TEXT	NO		\N	
-agg	TEXT	NO		\N	
-b	TINYINT	NO		\N	
-d	DATE	NO		\N	
-d32	DATE	NO		\N	
-dec128	DECIMAL(38, 2)	NO		\N	
-dec128_native	DECIMAL(35, 30)	NO		\N	
-dec128_text	TEXT	NO		\N	
-dec256	TEXT	NO		\N	
-dec256_native	DECIMAL(65, 2)	NO		\N	
-dec256_text	TEXT	NO		\N	
-dec32	DECIMAL(9, 2)	NO		\N	
-dec64	DECIMAL(18, 2)	NO		\N	
-dt	DATETIME	NO		\N	
-dt64	DATETIME	NO		\N	
-dt64_3_tz1	DATETIME	NO		\N	
-dt64_3_tz2	DATETIME	NO		\N	
-dt64_6	DATETIME	NO		\N	
-dt64_9	DATETIME	NO		\N	
-dt_tz1	DATETIME	NO		\N	
-dt_tz2	DATETIME	NO		\N	
-enm	TEXT	NO		\N	
-f32	FLOAT	NO		\N	
-f64	DOUBLE	NO		\N	
-fs	BLOB	NO		\N	
-i128	TEXT	NO		\N	
-i16	SMALLINT	NO		\N	
-i256	TEXT	NO		\N	
-i32	INTEGER	NO		\N	
-i64	BIGINT	NO		\N	
-i8	TINYINT	NO		\N	
-ip4	TEXT	NO		\N	
-ip6	TEXT	NO		\N	
-lfs	BLOB	NO		\N	
-lnfs	BLOB	YES		\N	
-lns	TEXT	YES		\N	
-ls	TEXT	NO		\N	
-m	JSON	NO		\N	
-m_complex	JSON	NO		\N	
-mpg	TEXT	NO		\N	
-ndt64	DATETIME	YES		\N	
-ndt64_tz	DATETIME	YES		\N	
-nested.col1	TEXT	NO		\N	
-nested.col2	TEXT	NO		\N	
-nfs	BLOB	YES		\N	
-ns	TEXT	YES		\N	
-o	JSON	NO		\N	
-p	TEXT	NO		\N	
-pg	TEXT	NO		\N	
-r	TEXT	NO		\N	
-s	TEXT	NO		\N	
-sagg	TEXT	NO		\N	
-t	JSON	NO		\N	
-ui128	TEXT	NO		\N	
-ui16	SMALLINT UNSIGNED	NO		\N	
-ui256	TEXT	NO		\N	
-ui32	INTEGER UNSIGNED	NO		\N	
-ui64	BIGINT UNSIGNED	NO		\N	
-ui8	TINYINT UNSIGNED	NO		\N	
-uuid	CHAR	NO		\N	
--- SHOW COLUMNS with mysql_map_fixed_string_to_text_in_show_columns = 1
-a	TEXT	NO		\N	
-agg	TEXT	NO		\N	
-b	TINYINT	NO		\N	
-d	DATE	NO		\N	
-d32	DATE	NO		\N	
-dec128	DECIMAL(38, 2)	NO		\N	
-dec128_native	DECIMAL(35, 30)	NO		\N	
-dec128_text	TEXT	NO		\N	
-dec256	TEXT	NO		\N	
-dec256_native	DECIMAL(65, 2)	NO		\N	
-dec256_text	TEXT	NO		\N	
-dec32	DECIMAL(9, 2)	NO		\N	
-dec64	DECIMAL(18, 2)	NO		\N	
-dt	DATETIME	NO		\N	
-dt64	DATETIME	NO		\N	
-dt64_3_tz1	DATETIME	NO		\N	
-dt64_3_tz2	DATETIME	NO		\N	
-dt64_6	DATETIME	NO		\N	
-dt64_9	DATETIME	NO		\N	
-dt_tz1	DATETIME	NO		\N	
-dt_tz2	DATETIME	NO		\N	
-enm	TEXT	NO		\N	
-f32	FLOAT	NO		\N	
-f64	DOUBLE	NO		\N	
-fs	TEXT	NO		\N	
-i128	TEXT	NO		\N	
-i16	SMALLINT	NO		\N	
-i256	TEXT	NO		\N	
-i32	INTEGER	NO		\N	
-i64	BIGINT	NO		\N	
-i8	TINYINT	NO		\N	
-ip4	TEXT	NO		\N	
-ip6	TEXT	NO		\N	
-lfs	TEXT	NO		\N	
-lnfs	TEXT	YES		\N	
-lns	BLOB	YES		\N	
-ls	BLOB	NO		\N	
-m	JSON	NO		\N	
-m_complex	JSON	NO		\N	
-mpg	TEXT	NO		\N	
-ndt64	DATETIME	YES		\N	
-ndt64_tz	DATETIME	YES		\N	
-nested.col1	TEXT	NO		\N	
-nested.col2	TEXT	NO		\N	
-nfs	TEXT	YES		\N	
-ns	BLOB	YES		\N	
-o	JSON	NO		\N	
-p	TEXT	NO		\N	
-pg	TEXT	NO		\N	
-r	TEXT	NO		\N	
-s	BLOB	NO		\N	
-sagg	TEXT	NO		\N	
-t	JSON	NO		\N	
-ui128	TEXT	NO		\N	
-ui16	SMALLINT UNSIGNED	NO		\N	
-ui256	TEXT	NO		\N	
-ui32	INTEGER UNSIGNED	NO		\N	
-ui64	BIGINT UNSIGNED	NO		\N	
-ui8	TINYINT UNSIGNED	NO		\N	
-uuid	CHAR	NO		\N	
--- SHOW COLUMNS with mysql_map_string_to_text_in_show_columns = 1 and without use_mysql_types_in_show_columns
-a	Array(String)	NO		\N	
-agg	AggregateFunction(uniq, UInt64)	NO		\N	
-b	Bool	NO		\N	
-d	Date	NO		\N	
-d32	Date32	NO		\N	
-dec128	Decimal(38, 2)	NO		\N	
-dec128_native	Decimal(35, 30)	NO		\N	
-dec128_text	Decimal(35, 31)	NO		\N	
-dec256	Decimal(76, 2)	NO		\N	
-dec256_native	Decimal(65, 2)	NO		\N	
-dec256_text	Decimal(66, 2)	NO		\N	
-dec32	Decimal(9, 2)	NO		\N	
-dec64	Decimal(18, 2)	NO		\N	
-dt	DateTime	NO		\N	
-dt64	DateTime64(3)	NO		\N	
-dt64_3_tz1	DateTime64(3, \'UTC\')	NO		\N	
-dt64_3_tz2	DateTime64(3, \'Asia/Shanghai\')	NO		\N	
-dt64_6	DateTime64(6, \'UTC\')	NO		\N	
-dt64_9	DateTime64(9, \'UTC\')	NO		\N	
-dt_tz1	DateTime(\'UTC\')	NO		\N	
-dt_tz2	DateTime(\'Europe/Amsterdam\')	NO		\N	
-enm	Enum8(\'hallo\' = 1, \'welt\' = 2)	NO		\N	
-f32	Float32	NO		\N	
-f64	Float64	NO		\N	
-fs	FixedString(3)	NO		\N	
-i128	Int128	NO		\N	
-i16	Int16	NO		\N	
-i256	Int256	NO		\N	
-i32	Int32	NO		\N	
-i64	Int64	NO		\N	
-i8	Int8	NO		\N	
-ip4	IPv4	NO		\N	
-ip6	IPv6	NO		\N	
-lfs	LowCardinality(FixedString(3))	NO		\N	
-lnfs	LowCardinality(Nullable(FixedString(3)))	YES		\N	
-lns	LowCardinality(Nullable(String))	YES		\N	
-ls	LowCardinality(String)	NO		\N	
-m	Map(Int32, String)	NO		\N	
-m_complex	Map(Int32, Map(Int32, LowCardinality(Nullable(String))))	NO		\N	
-mpg	MultiPolygon	NO		\N	
-ndt64	Nullable(DateTime64(3))	YES		\N	
-ndt64_tz	Nullable(DateTime64(3, \'Asia/Shanghai\'))	YES		\N	
-nested.col1	Array(String)	NO		\N	
-nested.col2	Array(UInt32)	NO		\N	
-nfs	Nullable(FixedString(3))	YES		\N	
-ns	Nullable(String)	YES		\N	
-o	Object(\'json\')	NO		\N	
-p	Point	NO		\N	
-pg	Polygon	NO		\N	
-r	Ring	NO		\N	
-s	String	NO		\N	
-sagg	SimpleAggregateFunction(sum, Float64)	NO		\N	
-t	Tuple(Int32, String, Nullable(String), LowCardinality(String), LowCardinality(Nullable(String)), Tuple(Int32, String))	NO		\N	
-ui128	UInt128	NO		\N	
-ui16	UInt16	NO		\N	
-ui256	UInt256	NO		\N	
-ui32	UInt32	NO		\N	
-ui64	UInt64	NO		\N	
-ui8	UInt8	NO		\N	
-uuid	UUID	NO		\N	
--- SHOW COLUMNS with mysql_map_fixed_string_to_text_in_show_columns = 1 and without use_mysql_types_in_show_columns
-a	Array(String)	NO		\N	
-agg	AggregateFunction(uniq, UInt64)	NO		\N	
-b	Bool	NO		\N	
-d	Date	NO		\N	
-d32	Date32	NO		\N	
-dec128	Decimal(38, 2)	NO		\N	
-dec128_native	Decimal(35, 30)	NO		\N	
-dec128_text	Decimal(35, 31)	NO		\N	
-dec256	Decimal(76, 2)	NO		\N	
-dec256_native	Decimal(65, 2)	NO		\N	
-dec256_text	Decimal(66, 2)	NO		\N	
-dec32	Decimal(9, 2)	NO		\N	
-dec64	Decimal(18, 2)	NO		\N	
-dt	DateTime	NO		\N	
-dt64	DateTime64(3)	NO		\N	
-dt64_3_tz1	DateTime64(3, \'UTC\')	NO		\N	
-dt64_3_tz2	DateTime64(3, \'Asia/Shanghai\')	NO		\N	
-dt64_6	DateTime64(6, \'UTC\')	NO		\N	
-dt64_9	DateTime64(9, \'UTC\')	NO		\N	
-dt_tz1	DateTime(\'UTC\')	NO		\N	
-dt_tz2	DateTime(\'Europe/Amsterdam\')	NO		\N	
-enm	Enum8(\'hallo\' = 1, \'welt\' = 2)	NO		\N	
-f32	Float32	NO		\N	
-f64	Float64	NO		\N	
-fs	FixedString(3)	NO		\N	
-i128	Int128	NO		\N	
-i16	Int16	NO		\N	
-i256	Int256	NO		\N	
-i32	Int32	NO		\N	
-i64	Int64	NO		\N	
-i8	Int8	NO		\N	
-ip4	IPv4	NO		\N	
-ip6	IPv6	NO		\N	
-lfs	LowCardinality(FixedString(3))	NO		\N	
-lnfs	LowCardinality(Nullable(FixedString(3)))	YES		\N	
-lns	LowCardinality(Nullable(String))	YES		\N	
-ls	LowCardinality(String)	NO		\N	
-m	Map(Int32, String)	NO		\N	
-m_complex	Map(Int32, Map(Int32, LowCardinality(Nullable(String))))	NO		\N	
-mpg	MultiPolygon	NO		\N	
-ndt64	Nullable(DateTime64(3))	YES		\N	
-ndt64_tz	Nullable(DateTime64(3, \'Asia/Shanghai\'))	YES		\N	
-nested.col1	Array(String)	NO		\N	
-nested.col2	Array(UInt32)	NO		\N	
-nfs	Nullable(FixedString(3))	YES		\N	
-ns	Nullable(String)	YES		\N	
-o	Object(\'json\')	NO		\N	
-p	Point	NO		\N	
-pg	Polygon	NO		\N	
-r	Ring	NO		\N	
-s	String	NO		\N	
-sagg	SimpleAggregateFunction(sum, Float64)	NO		\N	
-t	Tuple(Int32, String, Nullable(String), LowCardinality(String), LowCardinality(Nullable(String)), Tuple(Int32, String))	NO		\N	
-ui128	UInt128	NO		\N	
-ui16	UInt16	NO		\N	
-ui256	UInt256	NO		\N	
-ui32	UInt32	NO		\N	
-ui64	UInt64	NO		\N	
-ui8	UInt8	NO		\N	
-uuid	UUID	NO		\N	

From ef30e6723d0cb9f9d0f4f4b2fb107df74d6d6866 Mon Sep 17 00:00:00 2001
From: kevinyhzou <kevinyunhe8@gmail.com>
Date: Wed, 6 Sep 2023 11:28:53 +0800
Subject: [PATCH 40/80] bug fix csv read while end of line is not crlf

---
 docs/en/operations/settings/settings-formats.md    | 6 ++++++
 src/Core/Settings.h                                | 1 +
 src/Formats/FormatFactory.cpp                      | 3 ++-
 src/Formats/FormatSettings.h                       | 3 ++-
 src/Processors/Formats/Impl/CSVRowInputFormat.cpp  | 8 ++++----
 src/Processors/Formats/Impl/CSVRowOutputFormat.cpp | 2 +-
 6 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index bb59402079e..266f8f8bd66 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -897,6 +897,12 @@ Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
 
 Disabled by default.
 
+### input_format_csv_crlf_end_of_line {#input_format_csv_crlf_end_of_line}
+
+Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
+
+Disabled by default.
+
 ### input_format_csv_enum_as_number {#input_format_csv_enum_as_number}
 
 When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 609ade4cdc0..c35393ba353 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -884,6 +884,7 @@ class IColumn;
     M(Bool, format_csv_allow_single_quotes, false, "If it is set to true, allow strings in single quotes.", 0) \
     M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \
     M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
+    M(Bool, input_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n", 0) \
     M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices", 0) \
     M(Bool, input_format_csv_arrays_as_nested_csv, false, R"(When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted.)", 0) \
     M(Bool, input_format_skip_unknown_fields, true, "Skip columns with unknown names from input data (it works for JSONEachRow, -WithNames, -WithNamesAndTypes and TSKV formats).", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index d51ea9ad2d0..168a4dc48bb 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -58,7 +58,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.avro.output_rows_in_file = settings.output_format_avro_rows_in_file;
     format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
     format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
-    format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line;
+    format_settings.csv.crlf_end_of_line_for_output = settings.output_format_csv_crlf_end_of_line;
+    format_settings.csv.crlf_end_of_line_for_input = settings.input_format_csv_crlf_end_of_line;
     format_settings.csv.delimiter = settings.format_csv_delimiter;
     format_settings.csv.tuple_delimiter = settings.format_csv_delimiter;
     format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 382f8b7173a..fe4c9694ec9 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -149,7 +149,8 @@ struct FormatSettings
         bool allow_single_quotes = true;
         bool allow_double_quotes = true;
         bool empty_as_default = false;
-        bool crlf_end_of_line = false;
+        bool crlf_end_of_line_for_output = false;
+        bool crlf_end_of_line_for_input = false;
         bool enum_as_number = false;
         bool arrays_as_nested_csv = false;
         String null_representation = "\\N";
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index baaff8b497b..2e6f6e5e0e2 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -177,7 +177,7 @@ void CSVFormatReader::skipRow()
     }
 }
 
-static void skipEndOfLine(ReadBuffer & in)
+static void skipEndOfLine(ReadBuffer & in, bool crlf_end_of_line)
 {
     /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
 
@@ -192,7 +192,7 @@ static void skipEndOfLine(ReadBuffer & in)
         ++in.position();
         if (!in.eof() && *in.position() == '\n')
             ++in.position();
-        else
+        else if (crlf_end_of_line)
             throw Exception(ErrorCodes::INCORRECT_DATA,
                 "Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)."
                 " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.");
@@ -258,7 +258,7 @@ void CSVFormatReader::skipRowEndDelimiter()
     if (buf->eof())
         return;
 
-    skipEndOfLine(*buf);
+    skipEndOfLine(*buf, format_settings.csv.crlf_end_of_line_for_input);
 }
 
 void CSVFormatReader::skipHeaderRow()
@@ -343,7 +343,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
         return false;
     }
 
-    skipEndOfLine(*buf);
+    skipEndOfLine(*buf, format_settings.csv.crlf_end_of_line_for_input);
     return true;
 }
 
diff --git a/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp b/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
index 304e877aae9..965a339887d 100644
--- a/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
@@ -56,7 +56,7 @@ void CSVRowOutputFormat::writeFieldDelimiter()
 
 void CSVRowOutputFormat::writeRowEndDelimiter()
 {
-    if (format_settings.csv.crlf_end_of_line)
+    if (format_settings.csv.crlf_end_of_line_for_output)
         writeChar('\r', out);
     writeChar('\n', out);
 }

From 2a50daf5dde6c312cbc2df0b69c363807e9dd48f Mon Sep 17 00:00:00 2001
From: kevinyhzou <kevinyunhe8@gmail.com>
Date: Fri, 27 Oct 2023 15:43:44 +0800
Subject: [PATCH 41/80] Allow cr at end of csv line

---
 docs/en/operations/settings/settings-formats.md     |  4 ++--
 src/Core/Settings.h                                 |  2 +-
 src/Formats/FormatFactory.cpp                       |  4 ++--
 src/Formats/FormatSettings.h                        |  4 ++--
 src/Processors/Formats/Impl/CSVRowInputFormat.cpp   |  8 ++++----
 src/Processors/Formats/Impl/CSVRowOutputFormat.cpp  |  2 +-
 .../02891_input_csv_cr_end_of_line.reference        |  3 +++
 .../0_stateless/02891_input_csv_cr_end_of_line.sh   | 13 +++++++++++++
 .../data_csv/csv_with_cr_end_of_line.csv            |  2 ++
 9 files changed, 30 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/02891_input_csv_cr_end_of_line.reference
 create mode 100644 tests/queries/0_stateless/02891_input_csv_cr_end_of_line.sh
 create mode 100644 tests/queries/0_stateless/data_csv/csv_with_cr_end_of_line.csv

diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 266f8f8bd66..344e6dda680 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -897,9 +897,9 @@ Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
 
 Disabled by default.
 
-### input_format_csv_crlf_end_of_line {#input_format_csv_crlf_end_of_line}
+### input_format_csv_allow_cr_end_of_line {#input_format_csv_allow_cr_end_of_line}
 
-Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
+If it is set true, CR(\\r) will be allowed at end of line not followed by LF(\\n)
 
 Disabled by default.
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c35393ba353..06438fa5389 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -884,7 +884,7 @@ class IColumn;
     M(Bool, format_csv_allow_single_quotes, false, "If it is set to true, allow strings in single quotes.", 0) \
     M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \
     M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
-    M(Bool, input_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n", 0) \
+    M(Bool, input_format_csv_allow_cr_end_of_line, false, "If it is set true, \\r will be allowed at end of line not followed by \\n", 0) \
     M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices", 0) \
     M(Bool, input_format_csv_arrays_as_nested_csv, false, R"(When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted.)", 0) \
     M(Bool, input_format_skip_unknown_fields, true, "Skip columns with unknown names from input data (it works for JSONEachRow, -WithNames, -WithNamesAndTypes and TSKV formats).", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 168a4dc48bb..7fb355b6c43 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -58,8 +58,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.avro.output_rows_in_file = settings.output_format_avro_rows_in_file;
     format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
     format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
-    format_settings.csv.crlf_end_of_line_for_output = settings.output_format_csv_crlf_end_of_line;
-    format_settings.csv.crlf_end_of_line_for_input = settings.input_format_csv_crlf_end_of_line;
+    format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line;
+    format_settings.csv.allow_cr_end_of_line = settings.input_format_csv_allow_cr_end_of_line;
     format_settings.csv.delimiter = settings.format_csv_delimiter;
     format_settings.csv.tuple_delimiter = settings.format_csv_delimiter;
     format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index fe4c9694ec9..9f99a47d4d5 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -149,8 +149,8 @@ struct FormatSettings
         bool allow_single_quotes = true;
         bool allow_double_quotes = true;
         bool empty_as_default = false;
-        bool crlf_end_of_line_for_output = false;
-        bool crlf_end_of_line_for_input = false;
+        bool crlf_end_of_line = false;
+        bool allow_cr_end_of_line = false;
         bool enum_as_number = false;
         bool arrays_as_nested_csv = false;
         String null_representation = "\\N";
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index 2e6f6e5e0e2..9ea42de3d32 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -177,7 +177,7 @@ void CSVFormatReader::skipRow()
     }
 }
 
-static void skipEndOfLine(ReadBuffer & in, bool crlf_end_of_line)
+static void skipEndOfLine(ReadBuffer & in, bool allow_cr_end_of_line)
 {
     /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
 
@@ -192,7 +192,7 @@ static void skipEndOfLine(ReadBuffer & in, bool crlf_end_of_line)
         ++in.position();
         if (!in.eof() && *in.position() == '\n')
             ++in.position();
-        else if (crlf_end_of_line)
+        else if (!allow_cr_end_of_line)
             throw Exception(ErrorCodes::INCORRECT_DATA,
                 "Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)."
                 " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.");
@@ -258,7 +258,7 @@ void CSVFormatReader::skipRowEndDelimiter()
     if (buf->eof())
         return;
 
-    skipEndOfLine(*buf, format_settings.csv.crlf_end_of_line_for_input);
+    skipEndOfLine(*buf, format_settings.csv.allow_cr_end_of_line);
 }
 
 void CSVFormatReader::skipHeaderRow()
@@ -343,7 +343,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
         return false;
     }
 
-    skipEndOfLine(*buf, format_settings.csv.crlf_end_of_line_for_input);
+    skipEndOfLine(*buf, format_settings.csv.allow_cr_end_of_line);
     return true;
 }
 
diff --git a/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp b/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
index 965a339887d..304e877aae9 100644
--- a/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
@@ -56,7 +56,7 @@ void CSVRowOutputFormat::writeFieldDelimiter()
 
 void CSVRowOutputFormat::writeRowEndDelimiter()
 {
-    if (format_settings.csv.crlf_end_of_line_for_output)
+    if (format_settings.csv.crlf_end_of_line)
         writeChar('\r', out);
     writeChar('\n', out);
 }
diff --git a/tests/queries/0_stateless/02891_input_csv_cr_end_of_line.reference b/tests/queries/0_stateless/02891_input_csv_cr_end_of_line.reference
new file mode 100644
index 00000000000..9a9f63dc0a5
--- /dev/null
+++ b/tests/queries/0_stateless/02891_input_csv_cr_end_of_line.reference
@@ -0,0 +1,3 @@
+A	110	208819249
+B	112	208819248
+C	123	783434434
diff --git a/tests/queries/0_stateless/02891_input_csv_cr_end_of_line.sh b/tests/queries/0_stateless/02891_input_csv_cr_end_of_line.sh
new file mode 100644
index 00000000000..1e2f647fae3
--- /dev/null
+++ b/tests/queries/0_stateless/02891_input_csv_cr_end_of_line.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# NOTE: this sh wrapper is required because of shell_config
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl"
+$CLICKHOUSE_CLIENT -q "create table test_tbl (a String, b String, c String) engine=MergeTree order by a"
+cat $CURDIR/data_csv/csv_with_cr_end_of_line.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl SETTINGS input_format_csv_allow_cr_end_of_line=true FORMAT CSV"
+$CLICKHOUSE_CLIENT -q "select * from test_tbl"
+$CLICKHOUSE_CLIENT -q "drop table test_tbl"
\ No newline at end of file
diff --git a/tests/queries/0_stateless/data_csv/csv_with_cr_end_of_line.csv b/tests/queries/0_stateless/data_csv/csv_with_cr_end_of_line.csv
new file mode 100644
index 00000000000..077ca2c84c5
--- /dev/null
+++ b/tests/queries/0_stateless/data_csv/csv_with_cr_end_of_line.csv
@@ -0,0 +1,2 @@
+A,110,208819249
+B,112,208819248C,123,783434434

From 91f17cc2683ee77770c27c9bb46533ba84abee48 Mon Sep 17 00:00:00 2001
From: kevinyhzou <kevinyunhe8@gmail.com>
Date: Mon, 6 Nov 2023 14:09:12 +0800
Subject: [PATCH 42/80] ci fix

---
 tests/queries/0_stateless/02891_input_csv_cr_end_of_line.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tests/queries/0_stateless/02891_input_csv_cr_end_of_line.sh

diff --git a/tests/queries/0_stateless/02891_input_csv_cr_end_of_line.sh b/tests/queries/0_stateless/02891_input_csv_cr_end_of_line.sh
old mode 100644
new mode 100755

From 0ecbe54708f950372dc140c215f896cae44aa743 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 6 Nov 2023 08:55:09 +0000
Subject: [PATCH 43/80] Ensure sufficient size in exceptionn case

---
 src/Functions/formatQuery.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp
index 9fb8cabffd5..2f6bc6f9903 100644
--- a/src/Functions/formatQuery.cpp
+++ b/src/Functions/formatQuery.cpp
@@ -119,12 +119,18 @@ private:
             {
                 if constexpr (error_handling == ErrorHandling::Null)
                 {
+                    const size_t res_data_new_size = res_data_size + 1;
+                    if (res_data_new_size > res_data.size())
+                        res_data.resize(2 * res_data_new_size);
+
                     res_data[res_data_size] = '\0';
                     res_data_size += 1;
 
                     res_offsets[i] = res_data_size;
                     prev_offset = offsets[i];
+
                     res_null_map->getData()[i] = 1;
+
                     continue;
                 }
                 else

From 7eabafc8cb10abc41212cb6bcc8ddabcb6ad14cf Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 6 Nov 2023 08:57:08 +0000
Subject: [PATCH 44/80] Update expected results of 02888_obsolete_settings

---
 tests/queries/0_stateless/02888_obsolete_settings.reference | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02888_obsolete_settings.reference b/tests/queries/0_stateless/02888_obsolete_settings.reference
index e95b1c7aaed..39a395ad373 100644
--- a/tests/queries/0_stateless/02888_obsolete_settings.reference
+++ b/tests/queries/0_stateless/02888_obsolete_settings.reference
@@ -45,6 +45,7 @@ query_plan_optimize_projection
 replication_alter_columns_timeout
 restore_threads
 temporary_live_view_timeout
+use_mysql_types_in_show_columns
 -- Obsolete merge tree settings
 check_delay_period
 in_memory_parts_enable_wal

From fce276c441ee4d3008a4e8887d5f579567032b80 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 6 Nov 2023 09:29:02 +0000
Subject: [PATCH 45/80] Print info while decompressing

---
 utils/self-extracting-executable/decompressor.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index c20c1f0cbb2..1f19a349d65 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -45,6 +45,7 @@ int doDecompress(char * input, char * output, off_t & in_offset, off_t & out_off
         std::cerr << "Error (ZSTD):" << decompressed_size << " " << ZSTD_getErrorName(decompressed_size) << std::endl;
         return 1;
     }
+    std::cerr << "." << std::flush;
     return 0;
 }
 
@@ -173,7 +174,7 @@ bool isSudo()
     return geteuid() == 0;
 }
 
-/// Read data about files and decomrpess them.
+/// Read data about files and decompress them.
 int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, bool & has_exec, char * decompressed_suffix, uint64_t * decompressed_umask)
 {
     /// Read data about output file.
@@ -332,6 +333,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
 
     if (0 != munmap(input, info_in.st_size))
         perror("munmap");
+
+    std::cerr << std::endl;
     return 0;
 }
 
@@ -440,6 +443,8 @@ int main(int/* argc*/, char* argv[])
         return 1;
     }
 
+    std::cerr << "Decompressing the binary" << std::flush;
+
     std::stringstream lock_path; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
     lock_path << "/tmp/" << name << ".decompression." << inode << ".lock";
     int lock = open(lock_path.str().c_str(), O_CREAT | O_RDWR, 0666);

From 4fe3d10604d666a751b7f5750f77ef2fb9c4b033 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 6 Nov 2023 11:14:57 +0100
Subject: [PATCH 46/80] Unify all workflows for python unit tests, follow
 naming best practices

---
 .github/workflows/backport_branches.yml                  | 9 +++++++--
 .github/workflows/master.yml                             | 7 ++++++-
 .github/workflows/pull_request.yml                       | 4 ++--
 ...utoscale_runners_lambda_test.py => test_autoscale.py} | 0
 tests/ci/{docker_test.py => test_docker.py}              | 0
 tests/ci/{git_test.py => test_git.py}                    | 0
 tests/ci/{version_test.py => test_version.py}            | 0
 7 files changed, 15 insertions(+), 5 deletions(-)
 rename tests/ci/autoscale_runners_lambda/{autoscale_runners_lambda_test.py => test_autoscale.py} (100%)
 rename tests/ci/{docker_test.py => test_docker.py} (100%)
 rename tests/ci/{git_test.py => test_git.py} (100%)
 rename tests/ci/{version_test.py => test_version.py} (100%)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index eb4c29130c4..edc3b621d36 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -33,7 +33,12 @@ jobs:
       - name: Python unit tests
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 -m unittest discover -s . -p '*_test.py'
+          echo "Testing the main ci directory"
+          python3 -m unittest discover -s . -p 'test_*.py'
+          for dir in *_lambda/; do
+            echo "Testing $dir"
+            python3 -m unittest discover -s "$dir" -p 'test_*.py'
+          done
   DockerHubPushAarch64:
     runs-on: [self-hosted, style-checker-aarch64]
     needs: CheckLabels
@@ -69,7 +74,7 @@ jobs:
           name: changed_images_amd64
           path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
   DockerHubPush:
-    needs: [DockerHubPushAmd64, DockerHubPushAarch64]
+    needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests]
     runs-on: [self-hosted, style-checker]
     steps:
       - name: Check out repository code
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index a9aa7717add..1cffffbf395 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -19,7 +19,12 @@ jobs:
       - name: Python unit tests
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 -m unittest discover -s . -p '*_test.py'
+          echo "Testing the main ci directory"
+          python3 -m unittest discover -s . -p 'test_*.py'
+          for dir in *_lambda/; do
+            echo "Testing $dir"
+            python3 -m unittest discover -s "$dir" -p 'test_*.py'
+          done
   DockerHubPushAarch64:
     runs-on: [self-hosted, style-checker-aarch64]
     steps:
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 5937f434135..e93ee2e4f7a 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -47,10 +47,10 @@ jobs:
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           echo "Testing the main ci directory"
-          python3 -m unittest discover -s . -p '*_test.py'
+          python3 -m unittest discover -s . -p 'test_*.py'
           for dir in *_lambda/; do
             echo "Testing $dir"
-            python3 -m unittest discover -s "$dir" -p '*_test.py'
+            python3 -m unittest discover -s "$dir" -p 'test_*.py'
           done
   DockerHubPushAarch64:
     needs: CheckLabels
diff --git a/tests/ci/autoscale_runners_lambda/autoscale_runners_lambda_test.py b/tests/ci/autoscale_runners_lambda/test_autoscale.py
similarity index 100%
rename from tests/ci/autoscale_runners_lambda/autoscale_runners_lambda_test.py
rename to tests/ci/autoscale_runners_lambda/test_autoscale.py
diff --git a/tests/ci/docker_test.py b/tests/ci/test_docker.py
similarity index 100%
rename from tests/ci/docker_test.py
rename to tests/ci/test_docker.py
diff --git a/tests/ci/git_test.py b/tests/ci/test_git.py
similarity index 100%
rename from tests/ci/git_test.py
rename to tests/ci/test_git.py
diff --git a/tests/ci/version_test.py b/tests/ci/test_version.py
similarity index 100%
rename from tests/ci/version_test.py
rename to tests/ci/test_version.py

From 52850d5a4557dd72a8586130de0df3b1fa725c0f Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Mon, 6 Nov 2023 11:45:17 +0100
Subject: [PATCH 47/80] Update StorageSystemRemoteDataPaths.cpp

---
 src/Storages/System/StorageSystemRemoteDataPaths.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp
index 9e63fbcf693..b1cd90448ec 100644
--- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp
+++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp
@@ -80,7 +80,6 @@ Pipe StorageSystemRemoteDataPaths::read(
                     col_size->insert(object.bytes_size);
 
                     col_namespace->insertDefault();
-                    //col_namespace->insert(common_prefox_for_objects);
 
                     if (cache)
                     {

From cf07fd79fcba3d906d181adc33752edec0a712a9 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 6 Nov 2023 12:00:23 +0100
Subject: [PATCH 48/80] remove unstable test

---
 tests/integration/test_merge_tree_s3/test.py | 212 -------------------
 1 file changed, 212 deletions(-)

diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index 507f25209a4..3b2f1c0f6a6 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -941,215 +941,3 @@ def test_s3_disk_heavy_write_check_mem(cluster, broken_s3, node_name):
     assert int(result) > 0.8 * memory
 
     check_no_objects_after_drop(cluster, node_name=node_name)
-
-
-def get_memory_usage(node, query_id):
-    node.query("SYSTEM FLUSH LOGS")
-    memory_usage = node.query(
-        "SELECT memory_usage"
-        " FROM system.query_log"
-        f" WHERE query_id='{query_id}'"
-        "   AND type='QueryFinish'"
-    )
-    return int(memory_usage)
-
-
-def get_memory_usages(node, query_ids):
-    node.query("SYSTEM FLUSH LOGS")
-    result = []
-    for query_id in query_ids:
-        memory_usage = node.query(
-            "SELECT memory_usage"
-            " FROM system.query_log"
-            f" WHERE query_id='{query_id}'"
-            "   AND type='QueryFinish'"
-        )
-        result.append(int(memory_usage))
-    return result
-
-
-@pytest.mark.parametrize("node_name", ["node"])
-def test_heavy_insert_select_check_memory(cluster, broken_s3, node_name):
-    node = cluster.instances[node_name]
-
-    node.query(
-        """
-        CREATE TABLE central_query_log
-        (
-            control_plane_id UUID,
-            pod_id LowCardinality(String),
-            scrape_ts_microseconds DateTime64(6) CODEC(Delta(8), LZ4),
-            event_date Date,
-            event_time DateTime,
-            payload Array(String),
-            payload_01 String,
-            payload_02 String,
-            payload_03 String,
-            payload_04 String,
-            payload_05 String,
-            payload_06 String,
-            payload_07 String,
-            payload_08 String,
-            payload_09 String,
-            payload_10 String,
-            payload_11 String,
-            payload_12 String,
-            payload_13 String,
-            payload_14 String,
-            payload_15 String,
-            payload_16 String,
-            payload_17 String,
-            payload_18 String,
-            payload_19 String
-        )
-        ENGINE=MergeTree()
-        PARTITION BY toYYYYMM(event_date)
-        ORDER BY (control_plane_id, event_date, pod_id)
-        SETTINGS
-            storage_policy='s3'
-        """
-    )
-
-    node.query("SYSTEM STOP MERGES central_query_log")
-
-    write_count = 2
-    write_query_ids = []
-    for x in range(write_count):
-        query_id = f"INSERT_INTO_TABLE_RANDOM_DATA_QUERY_ID_{x}"
-        write_query_ids.append(query_id)
-        node.query(
-            """
-            INSERT INTO central_query_log
-            SELECT
-                control_plane_id,
-                pod_id,
-                toStartOfHour(event_time) + toIntervalSecond(randUniform(0,60)) as scrape_ts_microseconds,
-                toDate(event_time) as event_date,
-                event_time,
-                payload,
-                payload[1] as payload_01,
-                payload[2] as payload_02,
-                payload[3] as payload_03,
-                payload[4] as payload_04,
-                payload[5] as payload_05,
-                payload[6] as payload_06,
-                payload[7] as payload_07,
-                payload[8] as payload_08,
-                payload[9] as payload_09,
-                payload[10] as payload_10,
-                payload[11] as payload_11,
-                payload[12] as payload_12,
-                payload[13] as payload_13,
-                payload[14] as payload_14,
-                payload[15] as payload_15,
-                payload[16] as payload_16,
-                payload[17] as payload_17,
-                payload[18] as payload_18,
-                payload[19] as payload_19
-            FROM
-            (
-                SELECT
-                    control_plane_id,
-                    substring(payload[1], 1, 5) as pod_id,
-                    toDateTime('2022-12-12 00:00:00')
-                        + toIntervalDay(floor(randUniform(0,3)))
-                        + toIntervalHour(floor(randUniform(0,24)))
-                        + toIntervalSecond(floor(randUniform(0,60)))
-                        as event_time,
-                    payload
-                FROM
-                    generateRandom(
-                        'control_plane_id UUID, payload Array(String)',
-                        NULL,
-                        100,
-                        100
-                    )
-                LIMIT 10000
-            )
-            SETTINGS
-                max_insert_block_size=256000000,
-                min_insert_block_size_rows=1000000,
-                min_insert_block_size_bytes=256000000
-            """,
-            query_id=query_id,
-        )
-
-    memory = 845346116
-    for memory_usage, query_id in zip(
-        get_memory_usages(node, write_query_ids), write_query_ids
-    ):
-        assert int(memory_usage) < 1.2 * memory, f"{memory_usage} : {query_id}"
-        assert int(memory_usage) > 0.8 * memory, f"{memory_usage} : {query_id}"
-
-    broken_s3.setup_slow_answers(minimal_length=1000, timeout=5, count=20)
-    broken_s3.setup_fake_multpartuploads()
-
-    insert_query_id = f"INSERT_INTO_S3_FUNCTION_QUERY_ID"
-    node.query(
-        """
-        INSERT INTO
-            TABLE FUNCTION s3(
-                'http://resolver:8083/root/data/test-upload_{_partition_id}.csv.gz',
-                'minio', 'minio123',
-                'CSV', auto, 'gzip'
-            )
-        PARTITION BY formatDateTime(subtractHours(toDateTime('2022-12-13 00:00:00'), 1),'%Y-%m-%d_%H:00')
-        WITH toDateTime('2022-12-13 00:00:00') as time_point
-        SELECT
-            *
-        FROM central_query_log
-        WHERE
-            event_date >= subtractDays(toDate(time_point), 1)
-            AND scrape_ts_microseconds >= subtractHours(toStartOfHour(time_point), 12)
-            AND scrape_ts_microseconds < toStartOfDay(time_point)
-        SETTINGS
-            s3_max_inflight_parts_for_one_file=1
-        """,
-        query_id=insert_query_id,
-    )
-
-    query_id = f"SELECT_QUERY_ID"
-    total = node.query(
-        """
-        SELECT
-            count()
-        FROM central_query_log
-        """,
-        query_id=query_id,
-    )
-    assert int(total) == 10000 * write_count
-
-    query_id = f"SELECT_WHERE_QUERY_ID"
-    selected = node.query(
-        """
-        WITH toDateTime('2022-12-13 00:00:00') as time_point
-        SELECT
-            count()
-        FROM central_query_log
-        WHERE
-            event_date >= subtractDays(toDate(time_point), 1)
-            AND scrape_ts_microseconds >= subtractHours(toStartOfHour(time_point), 12)
-            AND scrape_ts_microseconds < toStartOfDay(time_point)
-        """,
-        query_id=query_id,
-    )
-    assert int(selected) < 4500, selected
-    assert int(selected) > 2500, selected
-
-    node.query("SYSTEM FLUSH LOGS")
-    profile_events = node.query(
-        f"""
-        SELECT ProfileEvents
-            FROM system.query_log
-            WHERE query_id='{insert_query_id}'
-            AND type='QueryFinish'
-        """
-    )
-
-    memory_usage = get_memory_usage(node, insert_query_id)
-    memory = 123507857
-    assert int(memory_usage) < 1.2 * memory, f"{memory_usage} {profile_events}"
-    assert int(memory_usage) > 0.8 * memory, f"{memory_usage} {profile_events}"
-
-    node.query(f"DROP TABLE IF EXISTS central_query_log SYNC")
-    remove_all_s3_objects(cluster)

From 036922e3b4bf47f795508345927a904b0f6d153b Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Mon, 6 Nov 2023 12:21:13 +0100
Subject: [PATCH 49/80] Update test.py

---
 tests/integration/test_storage_s3_queue/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index d659126f49a..e18efc044b5 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -672,7 +672,7 @@ def test_multiple_tables_streaming_sync(started_cluster, mode):
         + get_count(f"{dst_table_name}_3")
     ) != files_to_generate:
         info = node.query(
-            f"SELECT * FROM system.s3queue_log WHERE zookeeper_path like '%{table_name}' ORDER BY file_name FORMAT Vertical"
+            f"SELECT * FROM system.s3queue WHERE zookeeper_path like '%{table_name}' ORDER BY file_name FORMAT Vertical"
         )
         logging.debug(info)
         assert False
@@ -751,7 +751,7 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
         get_count(node, dst_table_name) + get_count(node_2, dst_table_name)
     ) != files_to_generate:
         info = node.query(
-            f"SELECT * FROM system.s3queue_log WHERE zookeeper_path like '%{table_name}' ORDER BY file_name FORMAT Vertical"
+            f"SELECT * FROM system.s3queue WHERE zookeeper_path like '%{table_name}' ORDER BY file_name FORMAT Vertical"
         )
         logging.debug(info)
         assert False

From b666ca93c7fc1d018d37acba0ca564dcc8f239b6 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 6 Nov 2023 14:13:24 +0100
Subject: [PATCH 50/80] Update 02735_system_zookeeper_connection.sql

---
 .../0_stateless/02735_system_zookeeper_connection.sql        | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.sql b/tests/queries/0_stateless/02735_system_zookeeper_connection.sql
index c98134e2f68..48ada633225 100644
--- a/tests/queries/0_stateless/02735_system_zookeeper_connection.sql
+++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.sql
@@ -10,14 +10,15 @@ ORDER BY tuple();
 
 SET session_timezone = 'UTC';
 
-select name, host, port, index, is_expired, keeper_api_version, (connected_time between yesterday() and now()),
+-- NOTE: Durind the query execution, now() can be evaluated a bit earlier than connected_time
+select name, host, port, index, is_expired, keeper_api_version, (connected_time between yesterday() and now() + interval 3 seconds),
        (abs(session_uptime_elapsed_seconds  - zookeeperSessionUptime()) < 10), enabled_feature_flags
 from system.zookeeper_connection where name='default';
 
 -- keeper_api_version will by 0 for auxiliary_zookeeper2, because we fail to get /api_version due to chroot
 -- I'm not sure if it's a bug or a useful trick to fallback to basic api
 -- Also, auxiliary zookeeper is created lazily
-select name, host, port, index, is_expired, keeper_api_version, (connected_time between yesterday() and now())
+select name, host, port, index, is_expired, keeper_api_version, (connected_time between yesterday() and now() + interval 3 seconds)
 from system.zookeeper_connection where name!='default';
 
 DROP TABLE IF EXISTS test_zk_connection_table;

From ae1dcb525430bf602b6bd28aa1e5b7586b193145 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 6 Nov 2023 09:59:12 +0000
Subject: [PATCH 51/80] Move documentation of string similarity functions to
 better location

---
 .../functions/string-functions.md             | 80 +++++++++++++++++++
 .../02884_string_distance_function.sql        | 12 +--
 2 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 47e16b67643..4df987b5e2a 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -1371,6 +1371,86 @@ Result:
 └──────────────────┘
 ```
 
+## byteHammingDistance
+
+Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
+
+**Syntax**
+
+```sql
+byteHammingDistance(string1, string2)
+```
+
+**Examples**
+
+``` sql
+SELECT byteHammingDistance('karolin', 'kathrin');
+```
+
+Result:
+
+``` text
+┌─byteHammingDistance('karolin', 'kathrin')─┐
+│                                         3 │
+└───────────────────────────────────────────┘
+```
+
+Alias: mismatches
+
+## stringJaccardIndex
+
+Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.
+
+**Syntax**
+
+```sql
+stringJaccardIndex(string1, string2)
+```
+
+**Examples**
+
+``` sql
+SELECT stringJaccardIndex('clickhouse', 'mouse');
+```
+
+Result:
+
+``` text
+┌─stringJaccardIndex('clickhouse', 'mouse')─┐
+│                                       0.4 │
+└───────────────────────────────────────────┘
+```
+
+## stringJaccardIndexUTF8
+
+Like [stringJaccardIndex](#stringJaccardIndex) but for UTF8-encoded strings.
+
+## editDistance
+
+Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two byte strings.
+
+**Syntax**
+
+```sql
+editDistance(string1, string2)
+```
+
+**Examples**
+
+``` sql
+SELECT editDistance('clickhouse', 'mouse');
+```
+
+Result:
+
+``` text
+┌─editDistance('clickhouse', 'mouse')─┐
+│                                   6 │
+└─────────────────────────────────────┘
+```
+
+Alias: levenshteinDistance
+
 ## initcap
 
 Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
diff --git a/tests/queries/0_stateless/02884_string_distance_function.sql b/tests/queries/0_stateless/02884_string_distance_function.sql
index 8126cfb5bd9..e3d9051ce5b 100644
--- a/tests/queries/0_stateless/02884_string_distance_function.sql
+++ b/tests/queries/0_stateless/02884_string_distance_function.sql
@@ -36,12 +36,12 @@ SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF\xFF\xFF\xF
 SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\x41\xE2\x82\xAC'));
 SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x9F\x99\x82'));
 SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF'));
-SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC2\x01')); -- { serverError 36 }
-SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC1\x81')); -- { serverError 36 }
-SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x80\x80\x41')); -- { serverError 36 }
-SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC0\x80')); -- { serverError 36 }
-SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xD8\x00 ')); -- { serverError 36 }
-SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xDC\x00')); -- { serverError 36 }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC2\x01')); -- { serverError BAD_ARGUMENTS }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC1\x81')); -- { serverError BAD_ARGUMENTS }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x80\x80\x41')); -- { serverError BAD_ARGUMENTS }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC0\x80')); -- { serverError BAD_ARGUMENTS }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xD8\x00 ')); -- { serverError BAD_ARGUMENTS }
+SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xDC\x00')); -- { serverError BAD_ARGUMENTS }
 
 SELECT stringJaccardIndexUTF8('😃🌍', '🙃😃🌑'), stringJaccardIndex('😃🌍', '🙃😃🌑');
 

From 8896134531167bc63dc530c0f86266e77f8320b7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 6 Nov 2023 10:29:29 +0000
Subject: [PATCH 52/80] Cleanup IDataType convenience functions

---
 .../functions/string-search-functions.md      |  76 ---------
 programs/obfuscator/Obfuscator.cpp            |   2 +-
 .../AggregateFunctionGroupArrayInsertAt.h     |   2 +-
 .../AggregateFunctionQuantile.h               |   2 +-
 src/DataTypes/IDataType.h                     | 158 +++++-------------
 src/Functions/FunctionBitTestMany.h           |   2 +-
 .../FunctionGenerateRandomStructure.cpp       |   2 +-
 src/Functions/FunctionsConversion.h           |   2 +-
 .../FunctionsMultiStringFuzzySearch.h         |   2 +-
 src/Functions/FunctionsStringHash.h           |   4 +-
 src/Functions/FunctionsStringSearch.h         |   2 +-
 src/Functions/array/arrayRandomSample.cpp     |   2 +-
 src/Functions/fromDaysSinceYearZero.cpp       |   2 +-
 src/Functions/randomFixedString.cpp           |   2 +-
 src/Functions/toFixedString.h                 |   2 +-
 src/Functions/vectorFunctions.cpp             |   2 +-
 src/Functions/ztest.cpp                       |   2 +-
 .../Transforms/FillingTransform.cpp           |   2 +-
 18 files changed, 62 insertions(+), 206 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index d37f417c7b5..1cb71e6f35d 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -681,79 +681,3 @@ Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are U
 ## hasSubsequenceCaseInsensitiveUTF8
 
 Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
-
-## byteHammingDistance
-
-Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
-
-**Syntax**
-
-```sql
-byteHammingDistance(string1, string2)
-```
-
-**Examples**
-
-``` sql
-SELECT byteHammingDistance('karolin', 'kathrin');
-```
-
-Result:
-
-``` text
-┌─byteHammingDistance('karolin', 'kathrin')─┐
-│                                         3 │
-└───────────────────────────────────────────┘
-```
-
-Alias: mismatches
-
-## stringJaccardIndex
-
-Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.
-
-**Syntax**
-
-```sql
-stringJaccardIndex(string1, string2)
-```
-
-**Examples**
-
-``` sql
-SELECT stringJaccardIndex('clickhouse', 'mouse');
-```
-
-Result:
-
-``` text
-┌─stringJaccardIndex('clickhouse', 'mouse')─┐
-│                                       0.4 │
-└───────────────────────────────────────────┘
-```
-
-## editDistance
-
-Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two byte strings.
-
-**Syntax**
-
-```sql
-editDistance(string1, string2)
-```
-
-**Examples**
-
-``` sql
-SELECT editDistance('clickhouse', 'mouse');
-```
-
-Result:
-
-``` text
-┌─editDistance('clickhouse', 'mouse')─┐
-│                                   6 │
-└─────────────────────────────────────┘
-```
-
-Alias: levenshteinDistance
diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp
index 15997ec986e..2cb5250cdf2 100644
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@@ -1106,7 +1106,7 @@ public:
     {
         if (isInteger(data_type))
         {
-            if (isUnsignedInteger(data_type))
+            if (isUInt(data_type))
                 return std::make_unique<UnsignedIntegerModel>(seed);
             else
                 return std::make_unique<SignedIntegerModel>(seed);
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
index 439bb613337..023e237ef96 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h
@@ -84,7 +84,7 @@ public:
             }
         }
 
-        if (!isUnsignedInteger(arguments[1]))
+        if (!isUInt(arguments[1]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of aggregate function {} must be unsigned integer.", getName());
 
         if (default_value.isNull())
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h
index 13320ad90b6..07db655025d 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@@ -238,7 +238,7 @@ public:
         if constexpr (has_second_arg)
         {
             assertBinary(Name::name, types);
-            if (!isUnsignedInteger(types[1]))
+            if (!isUInt(types[1]))
                 throw Exception(
                     ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                     "Second argument (weight) for function {} must be unsigned integer, but it has type {}",
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index f334cd9ff24..a1de6ea18a9 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -363,6 +363,9 @@ struct WhichDataType
     constexpr bool isNativeInt() const { return isInt8() || isInt16() || isInt32() || isInt64(); }
     constexpr bool isInt() const { return isNativeInt() || isInt128() || isInt256(); }
 
+    constexpr bool isNativeInteger() const { return isNativeInt() || isNativeUInt(); }
+    constexpr bool isInteger() const { return isInt() || isUInt(); }
+
     constexpr bool isDecimal32() const { return idx == TypeIndex::Decimal32; }
     constexpr bool isDecimal64() const { return idx == TypeIndex::Decimal64; }
     constexpr bool isDecimal128() const { return idx == TypeIndex::Decimal128; }
@@ -373,6 +376,9 @@ struct WhichDataType
     constexpr bool isFloat64() const { return idx == TypeIndex::Float64; }
     constexpr bool isFloat() const { return isFloat32() || isFloat64(); }
 
+    constexpr bool isNativeNumber() const { return isNativeInteger() || isFloat(); }
+    constexpr bool isNumber() const { return isInteger() || isFloat() || isDecimal(); }
+
     constexpr bool isEnum8() const { return idx == TypeIndex::Enum8; }
     constexpr bool isEnum16() const { return idx == TypeIndex::Enum16; }
     constexpr bool isEnum() const { return isEnum8() || isEnum16(); }
@@ -410,110 +416,60 @@ struct WhichDataType
 
 /// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
 
-template <typename T>
-inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); }
-template <typename T>
-inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); }
-template <typename T>
-inline bool isDateOrDate32(const T & data_type) { return WhichDataType(data_type).isDateOrDate32(); }
-template <typename T>
-inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); }
-template <typename T>
-inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); }
-template <typename T>
-inline bool isDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); }
-template <typename T>
-inline bool isDateOrDate32OrDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); }
+template <typename T> inline bool isUInt8(const T & data_type) { return WhichDataType(data_type).isUInt8(); }
+template <typename T> inline bool isUInt16(const T & data_type) { return WhichDataType(data_type).isUInt16(); }
+template <typename T> inline bool isUInt32(const T & data_type) { return WhichDataType(data_type).isUInt32(); }
+template <typename T> inline bool isUInt64(const T & data_type) { return WhichDataType(data_type).isUInt64(); }
+template <typename T> inline bool isNativeUInt(const T & data_type) { return WhichDataType(data_type).isNativeUInt(); }
+template <typename T> inline bool isUInt(const T & data_type) { return WhichDataType(data_type).isUInt(); }
 
-template <typename T>
-inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
-template <typename T>
-inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); }
-template <typename T>
-inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); }
-template <typename T>
-inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); }
-template <typename T>
-inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); }
-template <typename T>
-inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); }
-template <typename T>
-inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); }
-template <typename T>
-inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); }
-template <typename T>
-inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); }
-template <typename T>
-inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); }
+template <typename T> inline bool isInt8(const T & data_type) { return WhichDataType(data_type).isInt8(); }
+template <typename T> inline bool isInt16(const T & data_type) { return WhichDataType(data_type).isInt16(); }
+template <typename T> inline bool isInt32(const T & data_type) { return WhichDataType(data_type).isInt32(); }
+template <typename T> inline bool isInt64(const T & data_type) { return WhichDataType(data_type).isInt64(); }
+template <typename T> inline bool isNativeInt(const T & data_type) { return WhichDataType(data_type).isNativeInt(); }
+template <typename T> inline bool isInt(const T & data_type) { return WhichDataType(data_type).isInt(); }
 
-template <typename T>
-inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); }
+template <typename T> inline bool isInteger(const T & data_type) { return WhichDataType(data_type).isInteger(); }
+template <typename T> inline bool isNativeInteger(const T & data_type) { return WhichDataType(data_type).isNativeInteger(); }
 
-template <typename T>
-inline bool isUInt8(const T & data_type) { return WhichDataType(data_type).isUInt8(); }
-template <typename T>
-inline bool isUInt16(const T & data_type) { return WhichDataType(data_type).isUInt16(); }
-template <typename T>
-inline bool isUInt32(const T & data_type) { return WhichDataType(data_type).isUInt32(); }
-template <typename T>
-inline bool isUInt64(const T & data_type) { return WhichDataType(data_type).isUInt64(); }
-template <typename T>
-inline bool isNativeUnsignedInteger(const T & data_type) { return WhichDataType(data_type).isNativeUInt(); }
-template <typename T>
-inline bool isUnsignedInteger(const T & data_type) { return WhichDataType(data_type).isUInt(); }
+template <typename T> inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); }
 
-template <typename T>
-inline bool isInt8(const T & data_type) { return WhichDataType(data_type).isInt8(); }
-template <typename T>
-inline bool isInt16(const T & data_type) { return WhichDataType(data_type).isInt16(); }
-template <typename T>
-inline bool isInt32(const T & data_type) { return WhichDataType(data_type).isInt32(); }
-template <typename T>
-inline bool isInt64(const T & data_type) { return WhichDataType(data_type).isInt64(); }
-template <typename T>
-inline bool isInt(const T & data_type) { return WhichDataType(data_type).isInt(); }
+template <typename T> inline bool isFloat(const T & data_type) { return WhichDataType(data_type).isFloat(); }
 
-template <typename T>
-inline bool isInteger(const T & data_type)
-{
-    WhichDataType which(data_type);
-    return which.isInt() || which.isUInt();
-}
+template <typename T> inline bool isNativeNumber(const T & data_type) { return WhichDataType(data_type).isNativeNumber(); }
+template <typename T> inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); }
 
-template <typename T>
-inline bool isFloat(const T & data_type)
-{
-    WhichDataType which(data_type);
-    return which.isFloat();
-}
+template <typename T> inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
 
-template <typename T>
-inline bool isNativeInteger(const T & data_type)
-{
-    WhichDataType which(data_type);
-    return which.isNativeInt() || which.isNativeUInt();
-}
+template <typename T> inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); }
+template <typename T> inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); }
+template <typename T> inline bool isDateOrDate32(const T & data_type) { return WhichDataType(data_type).isDateOrDate32(); }
+template <typename T> inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); }
+template <typename T> inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); }
+template <typename T> inline bool isDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); }
+template <typename T> inline bool isDateOrDate32OrDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); }
 
+template <typename T> inline bool isString(const T & data_type) { return WhichDataType(data_type).isString(); }
+template <typename T> inline bool isFixedString(const T & data_type) { return WhichDataType(data_type).isFixedString(); }
+template <typename T> inline bool isStringOrFixedString(const T & data_type) { return WhichDataType(data_type).isStringOrFixedString(); }
 
-template <typename T>
-inline bool isNativeNumber(const T & data_type)
-{
-    WhichDataType which(data_type);
-    return which.isNativeInt() || which.isNativeUInt() || which.isFloat();
-}
+template <typename T> inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); }
+template <typename T> inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); }
+template <typename T> inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); }
+template <typename T> inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); }
+template <typename T> inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); }
+template <typename T> inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); }
+template <typename T> inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); }
+template <typename T> inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); }
 
-template <typename T>
-inline bool isNumber(const T & data_type)
-{
-    WhichDataType which(data_type);
-    return which.isInt() || which.isUInt() || which.isFloat() || which.isDecimal();
-}
+template <typename T> inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); }
 
 template <typename T>
 inline bool isColumnedAsNumber(const T & data_type)
 {
     WhichDataType which(data_type);
-    return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6();
+    return which.isInteger() || which.isFloat() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6();
 }
 
 template <typename T>
@@ -531,24 +487,6 @@ inline bool isColumnedAsDecimalT(const DataType & data_type)
     return (which.isDecimal() || which.isDateTime64()) && which.idx == TypeToTypeIndex<T>;
 }
 
-template <typename T>
-inline bool isString(const T & data_type)
-{
-    return WhichDataType(data_type).isString();
-}
-
-template <typename T>
-inline bool isFixedString(const T & data_type)
-{
-    return WhichDataType(data_type).isFixedString();
-}
-
-template <typename T>
-inline bool isStringOrFixedString(const T & data_type)
-{
-    return WhichDataType(data_type).isStringOrFixedString();
-}
-
 template <typename T>
 inline bool isNotCreatable(const T & data_type)
 {
@@ -567,12 +505,6 @@ inline bool isBool(const DataTypePtr & data_type)
     return data_type->getName() == "Bool";
 }
 
-inline bool isAggregateFunction(const DataTypePtr & data_type)
-{
-    WhichDataType which(data_type);
-    return which.isAggregateFunction();
-}
-
 inline bool isNullableOrLowCardinalityNullable(const DataTypePtr & data_type)
 {
     return data_type->isNullable() || data_type->isLowCardinalityNullable();
diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h
index 35af3a2a771..71e94b1e71d 100644
--- a/src/Functions/FunctionBitTestMany.h
+++ b/src/Functions/FunctionBitTestMany.h
@@ -49,7 +49,7 @@ public:
         {
             const auto & pos_arg = arguments[i];
 
-            if (!isUnsignedInteger(pos_arg))
+            if (!isUInt(pos_arg))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}", pos_arg->getName(), i, getName());
         }
 
diff --git a/src/Functions/FunctionGenerateRandomStructure.cpp b/src/Functions/FunctionGenerateRandomStructure.cpp
index f85b2596530..6dc68134502 100644
--- a/src/Functions/FunctionGenerateRandomStructure.cpp
+++ b/src/Functions/FunctionGenerateRandomStructure.cpp
@@ -365,7 +365,7 @@ DataTypePtr FunctionGenerateRandomStructure::getReturnTypeImpl(const DataTypes &
 
     for (size_t i = 0; i != arguments.size(); ++i)
     {
-        if (!isUnsignedInteger(arguments[i]) && !arguments[i]->onlyNull())
+        if (!isUInt(arguments[i]) && !arguments[i]->onlyNull())
         {
             throw Exception(
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index bda5fce1ac8..d5f1f175a37 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -2033,7 +2033,7 @@ static inline bool isDateTime64(const ColumnsWithTypeAndName & arguments)
     else if constexpr (std::is_same_v<Name, NameToDateTime> || std::is_same_v<Name, NameParseDateTimeBestEffort>
         || std::is_same_v<Name, NameParseDateTimeBestEffortOrZero> || std::is_same_v<Name, NameParseDateTimeBestEffortOrNull>)
     {
-        return (arguments.size() == 2 && isUnsignedInteger(arguments[1].type)) || arguments.size() == 3;
+        return (arguments.size() == 2 && isUInt(arguments[1].type)) || arguments.size() == 3;
     }
 
     return false;
diff --git a/src/Functions/FunctionsMultiStringFuzzySearch.h b/src/Functions/FunctionsMultiStringFuzzySearch.h
index 00d989f388e..18b411e9839 100644
--- a/src/Functions/FunctionsMultiStringFuzzySearch.h
+++ b/src/Functions/FunctionsMultiStringFuzzySearch.h
@@ -60,7 +60,7 @@ public:
         if (!isString(arguments[0]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
 
-        if (!isUnsignedInteger(arguments[1]))
+        if (!isUInt(arguments[1]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[1]->getName(), getName());
 
         const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[2].get());
diff --git a/src/Functions/FunctionsStringHash.h b/src/Functions/FunctionsStringHash.h
index b6ebc4b9410..d951e77395e 100644
--- a/src/Functions/FunctionsStringHash.h
+++ b/src/Functions/FunctionsStringHash.h
@@ -64,7 +64,7 @@ public:
 
         if (arguments.size() > 1)
         {
-            if (!isUnsignedInteger(arguments[1].type))
+            if (!isUInt(arguments[1].type))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                                 "Second argument (shingle size) of function {} must be unsigned integer, got {}",
                                 getName(), arguments[1].type->getName());
@@ -85,7 +85,7 @@ public:
                                 "Function {} expect no more than two arguments (text, shingle size), got {}",
                                 getName(), arguments.size());
 
-            if (!isUnsignedInteger(arguments[2].type))
+            if (!isUInt(arguments[2].type))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                                 "Third argument (num hashes) of function {} must be unsigned integer, got {}",
                                 getName(), arguments[2].type->getName());
diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h
index c9de29697bf..41b476ccc56 100644
--- a/src/Functions/FunctionsStringSearch.h
+++ b/src/Functions/FunctionsStringSearch.h
@@ -119,7 +119,7 @@ public:
 
         if (arguments.size() >= 3)
         {
-            if (!isUnsignedInteger(arguments[2]))
+            if (!isUInt(arguments[2]))
                 throw Exception(
                     ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                     "Illegal type {} of argument of function {}",
diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp
index 908ca9fa30a..01164313788 100644
--- a/src/Functions/array/arrayRandomSample.cpp
+++ b/src/Functions/array/arrayRandomSample.cpp
@@ -35,7 +35,7 @@ public:
     {
         FunctionArgumentDescriptors args{
             {"array", &isArray<IDataType>, nullptr, "Array"},
-            {"samples", &isUnsignedInteger<IDataType>, isColumnConst, "const UInt*"},
+            {"samples", &isUInt<IDataType>, isColumnConst, "const UInt*"},
         };
         validateFunctionArgumentTypes(*this, arguments, args);
 
diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp
index 804a243cda0..36a05f1cbf9 100644
--- a/src/Functions/fromDaysSinceYearZero.cpp
+++ b/src/Functions/fromDaysSinceYearZero.cpp
@@ -59,7 +59,7 @@ public:
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         FunctionArgumentDescriptors args{
-            {"days", &isNativeUnsignedInteger<IDataType>, nullptr, "UInt*"}
+            {"days", &isNativeUInt<IDataType>, nullptr, "UInt*"}
         };
 
         validateFunctionArgumentTypes(*this, arguments, args);
diff --git a/src/Functions/randomFixedString.cpp b/src/Functions/randomFixedString.cpp
index 508fae3e824..914800386d5 100644
--- a/src/Functions/randomFixedString.cpp
+++ b/src/Functions/randomFixedString.cpp
@@ -41,7 +41,7 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (!isUnsignedInteger(arguments[0].type))
+        if (!isUInt(arguments[0].type))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be unsigned integer", getName());
 
         if (!arguments[0].column || !isColumnConst(*arguments[0].column))
diff --git a/src/Functions/toFixedString.h b/src/Functions/toFixedString.h
index 6d14f0f1380..7bee666c5dd 100644
--- a/src/Functions/toFixedString.h
+++ b/src/Functions/toFixedString.h
@@ -47,7 +47,7 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (!isUnsignedInteger(arguments[1].type))
+        if (!isUInt(arguments[1].type))
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be unsigned integer", getName());
         if (!arguments[1].column)
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant", getName());
diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp
index 35ba49e4545..33b0e9f6039 100644
--- a/src/Functions/vectorFunctions.cpp
+++ b/src/Functions/vectorFunctions.cpp
@@ -1147,7 +1147,7 @@ public:
         double p;
         if (isFloat(p_column.column->getDataType()))
             p = p_column.column->getFloat64(0);
-        else if (isUnsignedInteger(p_column.column->getDataType()))
+        else if (isUInt(p_column.column->getDataType()))
             p = p_column.column->getUInt(0);
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be either constant Float64 or constant UInt", getName());
diff --git a/src/Functions/ztest.cpp b/src/Functions/ztest.cpp
index 9ced926d239..55e1b59a897 100644
--- a/src/Functions/ztest.cpp
+++ b/src/Functions/ztest.cpp
@@ -57,7 +57,7 @@ public:
     {
         for (size_t i = 0; i < 4; ++i)
         {
-            if (!isUnsignedInteger(arguments[i].type))
+            if (!isUInt(arguments[i].type))
             {
                 throw Exception(
                     ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index 9577f7ca7ff..6d6f4b87cef 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -226,7 +226,7 @@ FillingTransform::FillingTransform(
             throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
                 "Incompatible types of WITH FILL expression values with column type {}", type->getName());
 
-        if (isUnsignedInteger(type) &&
+        if (isUInt(type) &&
             ((!descr.fill_from.isNull() && less(descr.fill_from, Field{0}, 1)) ||
              (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1))))
         {

From 12b35b66fc5f28c91ff1f198e99d112f0428c043 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Mon, 6 Nov 2023 14:28:34 +0100
Subject: [PATCH 53/80] Update analyzer_integration_broken_tests.txt

---
 tests/analyzer_integration_broken_tests.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index 7c87a41dae9..23f22209451 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -8,7 +8,6 @@ test_executable_table_function/test.py::test_executable_function_input_python
 test_mask_sensitive_info/test.py::test_encryption_functions
 test_merge_table_over_distributed/test.py::test_global_in
 test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed
-test_merge_tree_s3/test.py::test_heavy_insert_select_check_memory[node]
 test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task
 test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
 test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster

From 3ecce143fcbefd9056386130e20ead9de495db38 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 6 Nov 2023 14:43:10 +0100
Subject: [PATCH 54/80] Restrict modes for the test

---
 tests/integration/test_storage_s3_queue/test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py
index d659126f49a..1489855fc08 100644
--- a/tests/integration/test_storage_s3_queue/test.py
+++ b/tests/integration/test_storage_s3_queue/test.py
@@ -626,7 +626,8 @@ def test_multiple_tables_meta_mismatch(started_cluster):
     )
 
 
-@pytest.mark.parametrize("mode", AVAILABLE_MODES)
+# TODO: Update the modes for this test to include "ordered" once PR #55795 is finished.
+@pytest.mark.parametrize("mode", ["unordered"])
 def test_multiple_tables_streaming_sync(started_cluster, mode):
     node = started_cluster.instances["instance"]
     table_name = f"multiple_tables_streaming_sync_{mode}"

From 13599ff462d6ce620e265eb2a2a4ebece8e86065 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 6 Nov 2023 15:31:54 +0100
Subject: [PATCH 55/80] review fixes

---
 src/Storages/MergeTree/MergeTreeData.cpp    |  2 ++
 src/Storages/StorageReplicatedMergeTree.cpp | 15 +++++++--------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index c22bd82b69a..e8a0b290dc9 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3904,6 +3904,8 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
         /// It will add the empty part to the set of Outdated parts without making it Active (exactly what we need)
         transaction.rollback(&lock);
         new_data_part->remove_time.store(0, std::memory_order_relaxed);
+        /// Such parts are always local, they don't participate in replication, they don't have shared blobs.
+        /// So we don't have locks for shared data in zk for them, and can just remove blobs (this avoids leaving garbage in S3)
         new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
     }
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 216e4e303fe..765e3f34301 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1364,18 +1364,17 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
 
     paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch);
 
-    ActiveDataPartSet empty_unexpected_parts_set(format_version);
+    ActiveDataPartSet set_of_empty_unexpected_parts(format_version);
     for (const auto & part : parts)
     {
         if (part->rows_count || part->getState() != MergeTreeDataPartState::Active || expected_parts.contains(part->name))
             continue;
 
-        empty_unexpected_parts_set.add(part->name);
-        const_cast<DataPart &>(*part).remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
+        set_of_empty_unexpected_parts.add(part->name);
     }
-    if (auto empty_count = empty_unexpected_parts_set.size())
-        LOG_INFO(log, "Found {} empty unexpected parts (probably some dropped parts were not cleaned up before restart): {}",
-                 empty_count, fmt::join(empty_unexpected_parts_set.getParts(), ", "));
+    if (auto empty_count = set_of_empty_unexpected_parts.size())
+        LOG_WARNING(log, "Found {} empty unexpected parts (probably some dropped parts were not cleaned up before restart): [{}]",
+                 empty_count, fmt::join(set_of_empty_unexpected_parts.getParts(), ", "));
 
     /** To check the adequacy, for the parts that are in the FS, but not in ZK, we will only consider not the most recent parts.
       * Because unexpected new parts usually arise only because they did not have time to enroll in ZK with a rough restart of the server.
@@ -1403,10 +1402,10 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
             continue;
         }
 
-        String covering_empty_part = empty_unexpected_parts_set.getContainingPart(part->name);
+        String covering_empty_part = set_of_empty_unexpected_parts.getContainingPart(part->name);
         if (!covering_empty_part.empty())
         {
-            LOG_WARNING(log, "Unexpected part {} is covered by empty paty {}, assuming it has been dropped just before restart",
+            LOG_INFO(log, "Unexpected part {} is covered by empty part {}, assuming it has been dropped just before restart",
                         part->name, covering_empty_part);
             covered_unexpected_parts.push_back(part->name);
             continue;

From 1ee73e0bb86cac05d4e7c261aeff9cece64cc3ef Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 6 Nov 2023 15:45:15 +0100
Subject: [PATCH 56/80] Update 02713_array_low_cardinality_string.sql

---
 .../queries/0_stateless/02713_array_low_cardinality_string.sql  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02713_array_low_cardinality_string.sql b/tests/queries/0_stateless/02713_array_low_cardinality_string.sql
index c55d57f04e7..964e82da963 100644
--- a/tests/queries/0_stateless/02713_array_low_cardinality_string.sql
+++ b/tests/queries/0_stateless/02713_array_low_cardinality_string.sql
@@ -18,6 +18,6 @@ WHERE database = currentDatabase() AND table = 'tab';
 
 SELECT '---';
 
-EXPLAIN indexes = 1, description=0 SELECT * FROM tab WHERE has(foo, 'b');
+EXPLAIN indexes = 1, description = 0 SELECT * FROM tab WHERE has(foo, 'b');
 
 DROP TABLE tab;

From 53033d2347357dd874748447824c258d3b2729a9 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Mon, 6 Nov 2023 16:12:48 +0100
Subject: [PATCH 57/80] Add a consistent digest and tests

---
 tests/ci/digest_helper.py         |  11 ++-
 tests/ci/test_digest.py           | 139 ++++++++++++++++++++++++++++++
 tests/ci/tests/digests/12         |   1 +
 tests/ci/tests/digests/dir1/12    |   1 +
 tests/ci/tests/digests/dir2/12    |   1 +
 tests/ci/tests/digests/dir2/13    |   1 +
 tests/ci/tests/digests/dir3       |   1 +
 tests/ci/tests/digests/symlink-12 |   1 +
 8 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 tests/ci/test_digest.py
 create mode 100644 tests/ci/tests/digests/12
 create mode 100644 tests/ci/tests/digests/dir1/12
 create mode 100644 tests/ci/tests/digests/dir2/12
 create mode 100644 tests/ci/tests/digests/dir2/13
 create mode 120000 tests/ci/tests/digests/dir3
 create mode 120000 tests/ci/tests/digests/symlink-12

diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py
index 69a62fa62b5..543de51e46b 100644
--- a/tests/ci/digest_helper.py
+++ b/tests/ci/digest_helper.py
@@ -46,7 +46,7 @@ def digest_path(path: Path, hash_object: Optional[HASH] = None) -> HASH:
 
 def digest_paths(paths: Iterable[Path], hash_object: Optional[HASH] = None) -> HASH:
     """Calculates aggregated md5 (or updates existing hash_object) hash of passed paths.
-    The order matters"""
+    The order is processed as given"""
     hash_object = hash_object or md5()
     for path in paths:
         if path.exists():
@@ -54,6 +54,15 @@ def digest_paths(paths: Iterable[Path], hash_object: Optional[HASH] = None) -> H
     return hash_object
 
 
+def digest_consistent_paths(
+    paths: Iterable[Path], hash_object: Optional[HASH] = None
+) -> HASH:
+    """Calculates aggregated md5 (or updates existing hash_object) hash of passed paths.
+    The order doesn't matter, paths are converted to `absolute` and ordered before
+    calculation"""
+    return digest_paths(sorted(p.absolute() for p in paths), hash_object)
+
+
 def digest_script(path_str: str) -> HASH:
     """Accepts value of the __file__ executed script and calculates the md5 hash for it"""
     path = Path(path_str)
diff --git a/tests/ci/test_digest.py b/tests/ci/test_digest.py
new file mode 100644
index 00000000000..246a3226721
--- /dev/null
+++ b/tests/ci/test_digest.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+
+import unittest
+from hashlib import md5
+from pathlib import Path
+
+import digest_helper as dh
+
+_12 = b"12\n"
+_13 = b"13\n"
+
+
+# pylint:disable=protected-access
+class TestDigests(unittest.TestCase):
+    tests_dir = Path("tests/digests")
+    broken_link = tests_dir / "broken-symlink"
+    empty_digest = "d41d8cd98f00b204e9800998ecf8427e"
+
+    def test__digest_file(self):
+        hash_tested = md5()
+        with self.assertRaises(
+            AssertionError, msg="_digest_file shouldn't work with dirs"
+        ):
+            dh._digest_file(self.tests_dir, hash_tested)
+        with self.assertRaises(
+            AssertionError, msg="_digest_file shouldn't work with broken links"
+        ):
+            dh._digest_file(self.broken_link, hash_tested)
+
+        # file with content '12\n'
+        hash_expected = md5()
+        hash_expected.update(_12)
+        dh._digest_file(self.tests_dir / "12", hash_tested)
+        self.assertEqual(hash_expected.digest(), hash_tested.digest())
+        # symlink to '12\n'
+        hash_tested = md5()
+        dh._digest_file(self.tests_dir / "symlink-12", hash_tested)
+        self.assertEqual(hash_expected.digest(), hash_tested.digest())
+
+    def test__digest_directory(self):
+        hash_tested = md5()
+        with self.assertRaises(
+            AssertionError, msg="_digest_directory shouldn't work with files"
+        ):
+            dh._digest_directory(self.tests_dir / "12", hash_tested)
+        with self.assertRaises(
+            AssertionError, msg="_digest_directory shouldn't work with broken links"
+        ):
+            dh._digest_file(self.broken_link, hash_tested)
+
+        # dir1
+        hash_expected = md5()
+        hash_expected.update(_12)
+        dh._digest_directory(self.tests_dir / "dir1", hash_tested)
+        self.assertEqual(hash_expected.digest(), hash_tested.digest())
+
+        # dir2 contains 12 and 13
+        hash_expected.update(_13)
+        hash_tested = md5()
+        dh._digest_directory(self.tests_dir / "dir2", hash_tested)
+        self.assertEqual(hash_expected.digest(), hash_tested.digest())
+
+        # dir3 is symlink to dir2
+        hash_tested = md5()
+        dh._digest_directory(self.tests_dir / "dir3", hash_tested)
+        self.assertEqual(hash_expected.digest(), hash_tested.digest())
+
+    def test_digest_path(self):
+        # test broken link does nothing
+        self.assertEqual(
+            self.empty_digest, dh.digest_path(self.broken_link).hexdigest()
+        )
+        # Test file works fine
+        hash_expected = md5()
+        hash_expected.update(_12)
+        self.assertEqual(
+            hash_expected.digest(), dh.digest_path(self.tests_dir / "12").digest()
+        )
+        # Test directory works fine
+        hash_expected = md5()
+        hash_expected.update(_12)
+        self.assertEqual(
+            hash_expected.digest(), dh.digest_path(self.tests_dir / "dir1").digest()
+        )
+        # Test existed hash is updated from symlink dir3
+        hash_tested = hash_expected.copy()
+        dh.digest_path(self.tests_dir / "dir3", hash_tested)
+        hash_expected = md5()
+        hash_expected.update(_12 + _12 + _13)
+        self.assertEqual(hash_expected.digest(), hash_tested.digest())
+        # Test the full content of the following structure
+        # tests/digests
+        # ├── 12
+        # ├── dir1
+        # │   └── 12
+        # ├── dir2
+        # │   ├── 12
+        # │   └── 13
+        # ├── dir3 -> dir2
+        # └── symlink-12 -> 12
+        hash_expected = md5()
+        hash_expected.update(_12 * 3 + (_13 + _12) * 2)
+        self.assertEqual(
+            hash_expected.digest(), dh.digest_path(self.tests_dir).digest()
+        )
+
+    def test_digest_paths(self):
+        # test paths order matters
+        hash_ordered = dh.digest_paths(
+            (self.tests_dir / d for d in ("dir1", "dir2", "dir3"))
+        )
+        hash_reversed = dh.digest_paths(
+            (self.tests_dir / d for d in ("dir3", "dir2", "dir1"))
+        )
+        hash_unordered = dh.digest_paths(
+            (self.tests_dir / d for d in ("dir3", "dir1", "dir2"))
+        )
+        self.assertNotEqual(hash_ordered.digest(), hash_unordered.digest())
+        self.assertNotEqual(hash_ordered.digest(), hash_reversed.digest())
+        self.assertNotEqual(hash_unordered.digest(), hash_reversed.digest())
+
+    def test_digest_consistent_paths(self):
+        # test paths order does not matter
+        hash_ordered = dh.digest_consistent_paths(
+            (self.tests_dir / d for d in ("dir1", "dir2", "dir3"))
+        )
+        hash_reversed = dh.digest_consistent_paths(
+            (self.tests_dir / d for d in ("dir3", "dir2", "dir1"))
+        )
+        self.assertEqual(hash_ordered.digest(), hash_reversed.digest())
+
+    @classmethod
+    def setUpClass(cls):
+        # create a broken symlink
+        (TestDigests.broken_link).symlink_to("non-existent-link")
+
+    @classmethod
+    def tearDownClass(cls):
+        (TestDigests.broken_link).unlink()
diff --git a/tests/ci/tests/digests/12 b/tests/ci/tests/digests/12
new file mode 100644
index 00000000000..48082f72f08
--- /dev/null
+++ b/tests/ci/tests/digests/12
@@ -0,0 +1 @@
+12
diff --git a/tests/ci/tests/digests/dir1/12 b/tests/ci/tests/digests/dir1/12
new file mode 100644
index 00000000000..48082f72f08
--- /dev/null
+++ b/tests/ci/tests/digests/dir1/12
@@ -0,0 +1 @@
+12
diff --git a/tests/ci/tests/digests/dir2/12 b/tests/ci/tests/digests/dir2/12
new file mode 100644
index 00000000000..48082f72f08
--- /dev/null
+++ b/tests/ci/tests/digests/dir2/12
@@ -0,0 +1 @@
+12
diff --git a/tests/ci/tests/digests/dir2/13 b/tests/ci/tests/digests/dir2/13
new file mode 100644
index 00000000000..b1bd38b62a0
--- /dev/null
+++ b/tests/ci/tests/digests/dir2/13
@@ -0,0 +1 @@
+13
diff --git a/tests/ci/tests/digests/dir3 b/tests/ci/tests/digests/dir3
new file mode 120000
index 00000000000..1e039be9000
--- /dev/null
+++ b/tests/ci/tests/digests/dir3
@@ -0,0 +1 @@
+dir2
\ No newline at end of file
diff --git a/tests/ci/tests/digests/symlink-12 b/tests/ci/tests/digests/symlink-12
new file mode 120000
index 00000000000..3cacc0b93c9
--- /dev/null
+++ b/tests/ci/tests/digests/symlink-12
@@ -0,0 +1 @@
+12
\ No newline at end of file

From 93cb51bad5e8e92a9958dd329d3ad273cb45a992 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 6 Nov 2023 19:13:26 +0100
Subject: [PATCH 58/80] Update stress.py

---
 tests/ci/stress.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/stress.py b/tests/ci/stress.py
index ef54191620d..92a28a54f86 100755
--- a/tests/ci/stress.py
+++ b/tests/ci/stress.py
@@ -137,7 +137,7 @@ def prepare_for_hung_check(drop_databases: bool) -> bool:
     # However, it obstructs checking for hung queries.
     logging.info("Will terminate gdb (if any)")
     call_with_retry("kill -TERM $(pidof gdb)")
-    call_with_retry("tail --pid=$(pidof gdb) -f /dev/null")
+    call_with_retry("timeout 50s tail --pid=$(pidof gdb) -f /dev/null || kill -9 $(pidof gdb) ||:", timeout=60)
     # Sometimes there is a message `Child process was stopped by signal 19` in logs after stopping gdb
     call_with_retry(
         "kill -CONT $(cat /var/run/clickhouse-server/clickhouse-server.pid) && clickhouse client -q 'SELECT 1 FORMAT Null'"

From 43f2b59625900f21c43866e1fed5734662145f24 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 6 Nov 2023 18:31:15 +0000
Subject: [PATCH 59/80] Automatic style fix

---
 tests/ci/stress.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/ci/stress.py b/tests/ci/stress.py
index 92a28a54f86..a6467ac7a6e 100755
--- a/tests/ci/stress.py
+++ b/tests/ci/stress.py
@@ -137,7 +137,10 @@ def prepare_for_hung_check(drop_databases: bool) -> bool:
     # However, it obstructs checking for hung queries.
     logging.info("Will terminate gdb (if any)")
     call_with_retry("kill -TERM $(pidof gdb)")
-    call_with_retry("timeout 50s tail --pid=$(pidof gdb) -f /dev/null || kill -9 $(pidof gdb) ||:", timeout=60)
+    call_with_retry(
+        "timeout 50s tail --pid=$(pidof gdb) -f /dev/null || kill -9 $(pidof gdb) ||:",
+        timeout=60,
+    )
     # Sometimes there is a message `Child process was stopped by signal 19` in logs after stopping gdb
     call_with_retry(
         "kill -CONT $(cat /var/run/clickhouse-server/clickhouse-server.pid) && clickhouse client -q 'SELECT 1 FORMAT Null'"

From c583b992ba4bbdb7cabfecf04130e3aa66ceb031 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 6 Nov 2023 18:42:56 +0000
Subject: [PATCH 60/80] Update development documentation about data streams

---
 docs/en/development/architecture.md           | 30 +++++++++++---
 docs/en/development/style.md                  |  2 +-
 .../operations/system-tables/stack_trace.md   | 40 +++++++++----------
 3 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md
index ba81b31b8ef..cfdd2bbcc41 100644
--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@@ -67,22 +67,30 @@ Implementations of `ReadBuffer`/`WriteBuffer` are used for working with files an
 
 Read/WriteBuffers only deal with bytes. There are functions from `ReadHelpers` and `WriteHelpers` header files to help with formatting input/output. For example, there are helpers to write a number in decimal format.
 
-Let’s look at what happens when you want to write a result set in `JSON` format to stdout. You have a result set ready to be fetched from `IBlockInputStream`. You create `WriteBufferFromFileDescriptor(STDOUT_FILENO)` to write bytes to stdout. You create `JSONRowOutputStream`, initialized with that `WriteBuffer`, to write rows in `JSON` to stdout. You create `BlockOutputStreamFromRowOutputStream` on top of it, to represent it as `IBlockOutputStream`. Then you call `copyData` to transfer data from `IBlockInputStream` to `IBlockOutputStream`, and everything works. Internally, `JSONRowOutputStream` will write various JSON delimiters and call the `IDataType::serializeTextJSON` method with a reference to `IColumn` and the row number as arguments. Consequently, `IDataType::serializeTextJSON` will call a method from `WriteHelpers.h`: for example, `writeText` for numeric types and `writeJSONString` for `DataTypeString`.
+Let's examine what happens when you want to write a result set in `JSON` format to stdout.
+You have a result set ready to be fetched from a pulling `QueryPipeline`.
+First, you create a `WriteBufferFromFileDescriptor(STDOUT_FILENO)` to write bytes to stdout.
+Next, you connect the result from the query pipeline to `JSONRowOutputFormat`, which is initialized with that `WriteBuffer`, to write rows in `JSON` format to stdout.
+This can be done via the `complete` method, which turns a pulling `QueryPipeline` into a completed `QueryPipeline`.
+Internally, `JSONRowOutputFormat` will write various JSON delimiters and call the `IDataType::serializeTextJSON` method with a reference to `IColumn` and the row number as arguments. Consequently, `IDataType::serializeTextJSON` will call a method from `WriteHelpers.h`: for example, `writeText` for numeric types and `writeJSONString` for `DataTypeString`.
 
 ## Tables {#tables}
 
 The `IStorage` interface represents tables. Different implementations of that interface are different table engines. Examples are `StorageMergeTree`, `StorageMemory`, and so on. Instances of these classes are just tables.
 
-The key `IStorage` methods are `read` and `write`. There are also `alter`, `rename`, `drop`, and so on. The `read` method accepts the following arguments: the set of columns to read from a table, the `AST` query to consider, and the desired number of streams to return. It returns one or multiple `IBlockInputStream` objects and information about the stage of data processing that was completed inside a table engine during query execution.
+The key methods in `IStorage` are `read` and `write`, along with others such as `alter`, `rename`, and `drop`. The `read` method accepts the following arguments: a set of columns to read from a table, the `AST` query to consider, and the desired number of streams. It returns a `Pipe`.
 
-In most cases, the read method is only responsible for reading the specified columns from a table, not for any further data processing. All further data processing is done by the query interpreter and is outside the responsibility of `IStorage`.
+In most cases, the read method is responsible only for reading the specified columns from a table, not for any further data processing.
+All subsequent data processing is handled by another part of the pipeline, which falls outside the responsibility of `IStorage`.
 
 But there are notable exceptions:
 
 - The AST query is passed to the `read` method, and the table engine can use it to derive index usage and to read fewer data from a table.
 - Sometimes the table engine can process data itself to a specific stage. For example, `StorageDistributed` can send a query to remote servers, ask them to process data to a stage where data from different remote servers can be merged, and return that preprocessed data. The query interpreter then finishes processing the data.
 
-The table’s `read` method can return multiple `IBlockInputStream` objects to allow parallel data processing. These multiple block input streams can read from a table in parallel. Then you can wrap these streams with various transformations (such as expression evaluation or filtering) that can be calculated independently and create a `UnionBlockInputStream` on top of them, to read from multiple streams in parallel.
+The table’s `read` method can return a `Pipe` consisting of multiple `Processors`. These `Processors` can read from a table in parallel.
+Then, you can connect these processors with various other transformations (such as expression evaluation or filtering), which can be calculated independently.
+And then, create a `QueryPipeline` on top of them, and execute it via `PipelineExecutor`.
 
 There are also `TableFunction`s. These are functions that return a temporary `IStorage` object to use in the `FROM` clause of a query.
 
@@ -98,9 +106,19 @@ A hand-written recursive descent parser parses a query. For example, `ParserSele
 
 ## Interpreters {#interpreters}
 
-Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the `INSERT` query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
+Interpreters are responsible for creating the query execution pipeline from an AST. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, as well as the more sophisticated `InterpreterSelectQuery`.
 
-`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations of query.
+The query execution pipeline is a combination of processors that can consume and produce chunks (sets of columns with specific types).
+A processor communicates via ports and can have multiple input ports and multiple output ports.
+A more detailed description can be found in [src/Processors/IProcessor.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h).
+
+For example, the result of interpreting the `SELECT` query is a "pulling" `QueryPipeline` which has a special output port to read the result set from.
+The result of the `INSERT` query is a "pushing" `QueryPipeline` with an input port to write data for insertion.
+And the result of interpreting the `INSERT SELECT` query is a "completed" `QueryPipeline` that has no inputs or outputs but copies data from `SELECT` to `INSERT` simultaneously.
+
+`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are performed. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted into separate classes to allow for modular transformations of the query.
+
+To address current problems that exist in interpreters, a new `InterpreterSelectQueryAnalyzer` is being developed. It is a new version of `InterpreterSelectQuery` that does not use `ExpressionAnalyzer` and introduces an additional abstraction level between `AST` and `QueryPipeline` called `QueryTree`. It is not production-ready yet, but it can be tested with the `allow_experimental_analyzer` flag.
 
 ## Functions {#functions}
 
diff --git a/docs/en/development/style.md b/docs/en/development/style.md
index 5b03468623d..0b71a669638 100644
--- a/docs/en/development/style.md
+++ b/docs/en/development/style.md
@@ -345,7 +345,7 @@ struct ExtractDomain
 **7.** For abstract classes (interfaces) you can add the `I` prefix.
 
 ``` cpp
-class IBlockInputStream
+class IProcessor
 ```
 
 **8.** If you use a variable locally, you can use the short name.
diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md
index 52ee7088597..90f1f47e52f 100644
--- a/docs/en/operations/system-tables/stack_trace.md
+++ b/docs/en/operations/system-tables/stack_trace.md
@@ -35,27 +35,25 @@ WITH arrayMap(x -> demangle(addressToSymbol(x)), trace) AS all SELECT thread_nam
 ``` text
 Row 1:
 ──────
-thread_name: clickhouse-serv
-
-thread_id: 686
-query_id:  1a11f70b-626d-47c1-b948-f9c7b206395d
-res:       sigqueue
-DB::StorageSystemStackTrace::fillData(std::__1::vector<COW<DB::IColumn>::mutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, DB::Context const&, DB::SelectQueryInfo const&) const
-DB::IStorageSystemOneBlock<DB::StorageSystemStackTrace>::read(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&, DB::SelectQueryInfo const&, DB::Context const&, DB::QueryProcessingStage::Enum, unsigned long, unsigned int)
-DB::InterpreterSelectQuery::executeFetchColumns(DB::QueryProcessingStage::Enum, DB::QueryPipeline&, std::__1::shared_ptr<DB::PrewhereInfo> const&, std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&)
-DB::InterpreterSelectQuery::executeImpl(DB::QueryPipeline&, std::__1::shared_ptr<DB::IBlockInputStream> const&, std::__1::optional<DB::Pipe>)
-DB::InterpreterSelectQuery::execute()
-DB::InterpreterSelectWithUnionQuery::execute()
-DB::executeQueryImpl(char const*, char const*, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool, DB::ReadBuffer*)
-DB::executeQuery(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool)
-DB::TCPHandler::runImpl()
-DB::TCPHandler::run()
-Poco::Net::TCPServerConnection::start()
-Poco::Net::TCPServerDispatcher::run()
-Poco::PooledThread::run()
-Poco::ThreadImpl::runnableEntry(void*)
-start_thread
-__clone
+thread_name: QueryPipelineEx
+thread_id:   743490
+query_id:    dc55a564-febb-4e37-95bb-090ef182c6f1
+res:         memcpy
+large_ralloc
+arena_ralloc
+do_rallocx
+Allocator<true, true>::realloc(void*, unsigned long, unsigned long, unsigned long)
+HashTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>::resize(unsigned long, unsigned long)
+void DB::Aggregator::executeImplBatch<false, false, true, DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>>(DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>&, DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>::State&, DB::Arena*, unsigned long, unsigned long, DB::Aggregator::AggregateFunctionInstruction*, bool, char*) const
+DB::Aggregator::executeImpl(DB::AggregatedDataVariants&, unsigned long, unsigned long, std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>&, DB::Aggregator::AggregateFunctionInstruction*, bool, bool, char*) const
+DB::Aggregator::executeOnBlock(std::__1::vector<COW<DB::IColumn>::immutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::immutable_ptr<DB::IColumn>>>, unsigned long, unsigned long, DB::AggregatedDataVariants&, std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>&, std::__1::vector<std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>, std::__1::allocator<std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>>>&, bool&) const
+DB::AggregatingTransform::work()
+DB::ExecutionThreadContext::executeTask()
+DB::PipelineExecutor::executeStepImpl(unsigned long, std::__1::atomic<bool>*)
+void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<DB::PipelineExecutor::spawnThreads()::$_0, void ()>>(std::__1::__function::__policy_storage const*)
+ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>::worker(std::__1::__list_iterator<ThreadFromGlobalPoolImpl<false>, void*>)
+void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<false>::ThreadFromGlobalPoolImpl<void ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(void&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*)
+void* std::__1::__thread_proxy[abi:v15000]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>>(void*)
 ```
 
 Getting filenames and line numbers in ClickHouse source code:

From 3236f269b598bbefdfba37640e87960d78a50fa6 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 6 Nov 2023 18:43:17 +0000
Subject: [PATCH 61/80] Update rus development documentation about data streams

---
 docs/ru/development/architecture.md           | 37 ++++++++---------
 docs/ru/development/style.md                  |  2 +-
 .../operations/system-tables/stack_trace.md   | 40 +++++++++----------
 3 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md
index 35741570702..b2e851a78cd 100644
--- a/docs/ru/development/architecture.md
+++ b/docs/ru/development/architecture.md
@@ -49,21 +49,9 @@ ClickHouse — полноценная столбцовая СУБД. Данны
 
 Блоки создаются для всех обработанных фрагментов данных. Напоминаем, что одни и те же типы вычислений, имена столбцов и типы переиспользуются в разных блоках и только данные колонок изменяются. Лучше разделить данные и заголовок блока потому, что в блоках маленького размера мы имеем большой оверхэд по временным строкам при копировании умных указателей (`shared_ptrs`) и имен столбцов.
 
-## Потоки блоков (Block Streams) {#block-streams}
+## Процессоры
 
-Потоки блоков обрабатывают данные. Мы используем потоки блоков для чтения данных, трансформации или записи данных куда-либо. `IBlockInputStream` предоставляет метод `read` для получения следующего блока, пока это возможно, и метод `write`, чтобы продвигать (push) блок куда-либо.
-
-Потоки отвечают за:
-
-1. Чтение и запись в таблицу. Таблица лишь возвращает поток для чтения или записи блоков.
-2. Реализацию форматов данных. Например, при выводе данных в терминал в формате `Pretty`, вы создаете выходной поток блоков, который форматирует поступающие в него блоки.
-3. Трансформацию данных. Допустим, у вас есть `IBlockInputStream` и вы хотите создать отфильтрованный поток. Вы создаете `FilterBlockInputStream` и инициализируете его вашим потоком. Затем вы тянете (pull) блоки из `FilterBlockInputStream`, а он тянет блоки исходного потока, фильтрует их и возвращает отфильтрованные блоки вам. Таким образом построены конвейеры выполнения запросов.
-
-Имеются и более сложные трансформации. Например, когда вы тянете блоки из `AggregatingBlockInputStream`, он считывает все данные из своего источника, агрегирует их, и возвращает поток агрегированных данных вам. Другой пример: конструктор `UnionBlockInputStream` принимает множество источников входных данных и число потоков. Такой `Stream` работает в несколько потоков и читает данные источников параллельно.
-
-> Потоки блоков используют «втягивающий» (pull) подход к управлению потоком выполнения: когда вы вытягиваете блок из первого потока, он, следовательно, вытягивает необходимые блоки из вложенных потоков, так и работает весь конвейер выполнения. Ни «pull» ни «push» не имеют явного преимущества, потому что поток управления неявный, и это ограничивает в реализации различных функций, таких как одновременное выполнение нескольких запросов (слияние нескольких конвейеров вместе). Это ограничение можно преодолеть с помощью сопрограмм (coroutines) или просто запуском дополнительных потоков, которые ждут друг друга. У нас может быть больше возможностей, если мы сделаем поток управления явным: если мы локализуем логику для передачи данных из одной расчетной единицы в другую вне этих расчетных единиц. Читайте эту [статью](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) для углубленного изучения.
-
-Следует отметить, что конвейер выполнения запроса создает временные данные на каждом шаге. Мы стараемся сохранить размер блока достаточно маленьким, чтобы временные данные помещались в кэш процессора. При таком допущении запись и чтение временных данных практически бесплатны по сравнению с другими расчетами. Мы могли бы рассмотреть альтернативу, которая заключается в том, чтобы объединить многие операции в конвейере вместе. Это может сделать конвейер как можно короче и удалить большую часть временных данных, что может быть преимуществом, но у такого подхода также есть недостатки. Например, разделенный конвейер позволяет легко реализовать кэширование промежуточных данных, использование промежуточных данных из аналогичных запросов, выполняемых одновременно, и объединение конвейеров для аналогичных запросов.
+Смотрите описание в файле [src/Processors/IProcessor.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h) исходного кода.
 
 ## Форматы {#formats}
 
@@ -81,13 +69,16 @@ ClickHouse — полноценная столбцовая СУБД. Данны
 
 Буферы чтения-записи имеют дело только с байтами. В заголовочных файлах `ReadHelpers` и `WriteHelpers` объявлены некоторые функции, чтобы помочь с форматированием ввода-вывода. Например, есть помощники для записи числа в десятичном формате.
 
-Давайте посмотрим, что происходит, когда вы хотите вывести результат в `JSON` формате в стандартный вывод (stdout). У вас есть результирующий набор данных, готовый к извлечению из `IBlockInputStream`. Вы создаете `WriteBufferFromFileDescriptor(STDOUT_FILENO)` чтобы записать байты в stdout. Вы создаете `JSONRowOutputStream`, инициализируете с этим `WriteBuffer`'ом, чтобы записать строки `JSON` в stdout. Кроме того вы создаете `BlockOutputStreamFromRowOutputStream`, реализуя `IBlockOutputStream`. Затем вызывается `copyData` для передачи данных из `IBlockInputStream` в `IBlockOutputStream` и все работает. Внутренний `JSONRowOutputStream` будет писать в формате `JSON` различные разделители и вызвать `IDataType::serializeTextJSON` метод со ссылкой на `IColumn` и номер строки в качестве аргументов. Следовательно, `IDataType::serializeTextJSON` вызовет метод из `WriteHelpers.h`: например, `writeText` для числовых типов и `writeJSONString` для `DataTypeString`.
+Давайте посмотрим, что происходит, когда вы хотите вывести результат в `JSON` формате в стандартный вывод (stdout). У вас есть результирующий набор данных, готовый к извлечению из `QueryPipeline`. Вы создаете `WriteBufferFromFileDescriptor(STDOUT_FILENO)` чтобы записать байты в stdout. Вы создаете `JSONRowOutputFormat`, инициализируете с этим `WriteBuffer`'ом, чтобы записать строки `JSON` в stdout.
+Чтобы соеденить выход `QueryPipeline` с форматом, можно использовать метод `complete`, который превращает `QueryPipeline` в завершенный `QueryPipeline`.
+Внутренний `JSONRowOutputStream` будет писать в формате `JSON` различные разделители и вызвать `IDataType::serializeTextJSON` метод со ссылкой на `IColumn` и номер строки в качестве аргументов. Следовательно, `IDataType::serializeTextJSON` вызовет метод из `WriteHelpers.h`: например, `writeText` для числовых типов и `writeJSONString` для `DataTypeString`.
 
 ## Таблицы {#tables}
 
 Интерфейс `IStorage` служит для отображения таблицы. Различные движки таблиц являются реализациями этого интерфейса. Примеры `StorageMergeTree`, `StorageMemory` и так далее. Экземпляры этих классов являются просто таблицами.
 
-Ключевые методы `IStorage` это `read` и `write`. Есть и другие варианты — `alter`, `rename`, `drop` и так далее. Метод `read` принимает следующие аргументы: набор столбцов для чтения из таблицы, `AST` запрос и желаемое количество потоков для вывода. Он возвращает один или несколько объектов `IBlockInputStream` и информацию о стадии обработки данных, которая была завершена внутри табличного движка во время выполнения запроса.
+Ключевые методы `IStorage` это `read` и `write`. Есть и другие варианты — `alter`, `rename`, `drop` и так далее.
+Метод `read` принимает следующие аргументы: набор столбцов для чтения из таблицы, `AST` запрос и желаемое количество потоков для вывода и возвращает `Pipe`.
 
 В большинстве случаев метод read отвечает только за чтение указанных столбцов из таблицы, а не за дальнейшую обработку данных. Вся дальнейшая обработка данных осуществляется интерпретатором запросов и не входит в сферу ответственности `IStorage`.
 
@@ -96,7 +87,9 @@ ClickHouse — полноценная столбцовая СУБД. Данны
 -   AST-запрос, передающийся в метод `read`, может использоваться движком таблицы для получения информации о возможности использования индекса и считывания меньшего количества данных из таблицы.
 -   Иногда движок таблиц может сам обрабатывать данные до определенного этапа. Например, `StorageDistributed` можно отправить запрос на удаленные серверы, попросить их обработать данные до этапа, когда данные с разных удаленных серверов могут быть объединены, и вернуть эти предварительно обработанные данные. Затем интерпретатор запросов завершает обработку данных.
 
-Метод `read` может возвращать несколько объектов `IBlockInputStream`, позволяя осуществлять параллельную обработку данных. Эти несколько блочных входных потоков могут считываться из таблицы параллельно. Затем вы можете обернуть эти потоки различными преобразованиями (такими как вычисление выражений или фильтрация), которые могут быть вычислены независимо, и создать `UnionBlockInputStream` поверх них, чтобы читать из нескольких потоков параллельно.
+Метод `read` может возвращать `Pipe`, состоящий из нескольких процессоров. Каждый их этих процессоров может читать данные параллельно.
+Затем, вы можете соеденить эти просессоры с другими преобразованиями (такими как вычисление выражений или фильтрация), которые могут быть вычислены независимо.
+Далее, создан `QueryPipeline` поверх них, можно выполнить пайплайн с помощью `PipelineExecutor`.
 
 Есть и другие варианты. Например, `TableFunction` возвращает временный объект `IStorage`, который можно подставить во `FROM`.
 
@@ -112,10 +105,18 @@ ClickHouse — полноценная столбцовая СУБД. Данны
 
 ## Интерпретаторы {#interpreters}
 
-Интерпретаторы отвечают за создание конвейера выполнения запроса из `AST`. Есть простые интерпретаторы, такие как `InterpreterExistsQuery` и `InterpreterDropQuery` или более сложный `InterpreterSelectQuery`. Конвейер выполнения запроса представляет собой комбинацию входных и выходных потоков блоков. Например, результатом интерпретации `SELECT` запроса является `IBlockInputStream` для чтения результирующего набора данных; результат интерпретации `INSERT` запроса — это `IBlockOutputStream`, для записи данных, предназначенных для вставки; результат интерпретации `INSERT SELECT` запроса — это `IBlockInputStream`, который возвращает пустой результирующий набор при первом чтении, но копирует данные из `SELECT` к `INSERT`.
+Интерпретаторы отвечают за создание конвейера выполнения запроса из `AST`. Есть простые интерпретаторы, такие как `InterpreterExistsQuery` и `InterpreterDropQuery` или более сложный `InterpreterSelectQuery`.
+
+Конвейер выполнения запроса представляет собой комбинацию процессоров, которые могут принимать на вход и также возвращать чанки (набор колонок с их типами)
+Процессоры обмениваются данными через порты и могут иметь несколько входных и выходных портов.
+Более подробное описание можно найти в файле [src/Processors/IProcessor.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h).
+
+Например, результатом интерпретации `SELECT` запроса является `QueryPipeline`, который имеет специальный выходной порт для чтения результирующего набора данных. Результатом интерпретации `INSERT` запроса является `QueryPipeline` с входным портом для записи данных для вставки. Результатом интерпретации `INSERT SELECT` запроса является завершенный `QueryPipeline`, который не имеет входов или выходов, но копирует данные из `SELECT` в `INSERT` одновременно.
 
 `InterpreterSelectQuery` использует `ExpressionAnalyzer` и `ExpressionActions` механизмы для анализа запросов и преобразований. Именно здесь выполняется большинство оптимизаций запросов на основе правил. `ExpressionAnalyzer` написан довольно грязно и должен быть переписан: различные преобразования запросов и оптимизации должны быть извлечены в отдельные классы, чтобы позволить модульные преобразования или запросы.
 
+Для решения текущих проблем, существующих в интерпретаторах, разрабатывается новый `InterpreterSelectQueryAnalyzer`. Это новая версия `InterpreterSelectQuery`, которая не использует `ExpressionAnalyzer` и вводит дополнительный уровень абстракции между `AST` и `QueryPipeline`, называемый `QueryTree`. Он еще не готов к использованию в продакшене, но его можно протестировать с помощью флага `allow_experimental_analyzer`.
+
 ## Функции {#functions}
 
 Существуют обычные функции и агрегатные функции. Агрегатные функции смотрите в следующем разделе.
diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md
index 6098dc9c13b..49c4aade4e9 100644
--- a/docs/ru/development/style.md
+++ b/docs/ru/development/style.md
@@ -345,7 +345,7 @@ struct ExtractDomain
 **7.** Для абстрактных классов (интерфейсов) можно добавить в начало имени букву `I`.
 
 ``` cpp
-class IBlockInputStream
+class IProcessor
 ```
 
 **8.** Если переменная используется достаточно локально, то можно использовать короткое имя.
diff --git a/docs/ru/operations/system-tables/stack_trace.md b/docs/ru/operations/system-tables/stack_trace.md
index 817f66d1af0..bf9dbd55f80 100644
--- a/docs/ru/operations/system-tables/stack_trace.md
+++ b/docs/ru/operations/system-tables/stack_trace.md
@@ -31,27 +31,25 @@ WITH arrayMap(x -> demangle(addressToSymbol(x)), trace) AS all SELECT thread_nam
 ``` text
 Row 1:
 ──────
-thread_name: clickhouse-serv
-
-thread_id: 686
-query_id:  1a11f70b-626d-47c1-b948-f9c7b206395d
-res:       sigqueue
-DB::StorageSystemStackTrace::fillData(std::__1::vector<COW<DB::IColumn>::mutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, DB::Context const&, DB::SelectQueryInfo const&) const
-DB::IStorageSystemOneBlock<DB::StorageSystemStackTrace>::read(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&, DB::SelectQueryInfo const&, DB::Context const&, DB::QueryProcessingStage::Enum, unsigned long, unsigned int)
-DB::InterpreterSelectQuery::executeFetchColumns(DB::QueryProcessingStage::Enum, DB::QueryPipeline&, std::__1::shared_ptr<DB::PrewhereInfo> const&, std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&)
-DB::InterpreterSelectQuery::executeImpl(DB::QueryPipeline&, std::__1::shared_ptr<DB::IBlockInputStream> const&, std::__1::optional<DB::Pipe>)
-DB::InterpreterSelectQuery::execute()
-DB::InterpreterSelectWithUnionQuery::execute()
-DB::executeQueryImpl(char const*, char const*, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool, DB::ReadBuffer*)
-DB::executeQuery(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool)
-DB::TCPHandler::runImpl()
-DB::TCPHandler::run()
-Poco::Net::TCPServerConnection::start()
-Poco::Net::TCPServerDispatcher::run()
-Poco::PooledThread::run()
-Poco::ThreadImpl::runnableEntry(void*)
-start_thread
-__clone
+thread_name: QueryPipelineEx
+thread_id:   743490
+query_id:    dc55a564-febb-4e37-95bb-090ef182c6f1
+res:         memcpy
+large_ralloc
+arena_ralloc
+do_rallocx
+Allocator<true, true>::realloc(void*, unsigned long, unsigned long, unsigned long)
+HashTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>::resize(unsigned long, unsigned long)
+void DB::Aggregator::executeImplBatch<false, false, true, DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>>(DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>&, DB::AggregationMethodOneNumber<unsigned long, HashMapTable<unsigned long, HashMapCell<unsigned long, char*, HashCRC32<unsigned long>, HashTableNoState, PairNoInit<unsigned long, char*>>, HashCRC32<unsigned long>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, true, false>::State&, DB::Arena*, unsigned long, unsigned long, DB::Aggregator::AggregateFunctionInstruction*, bool, char*) const
+DB::Aggregator::executeImpl(DB::AggregatedDataVariants&, unsigned long, unsigned long, std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>&, DB::Aggregator::AggregateFunctionInstruction*, bool, bool, char*) const
+DB::Aggregator::executeOnBlock(std::__1::vector<COW<DB::IColumn>::immutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::immutable_ptr<DB::IColumn>>>, unsigned long, unsigned long, DB::AggregatedDataVariants&, std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>&, std::__1::vector<std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>, std::__1::allocator<std::__1::vector<DB::IColumn const*, std::__1::allocator<DB::IColumn const*>>>>&, bool&) const
+DB::AggregatingTransform::work()
+DB::ExecutionThreadContext::executeTask()
+DB::PipelineExecutor::executeStepImpl(unsigned long, std::__1::atomic<bool>*)
+void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<DB::PipelineExecutor::spawnThreads()::$_0, void ()>>(std::__1::__function::__policy_storage const*)
+ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>::worker(std::__1::__list_iterator<ThreadFromGlobalPoolImpl<false>, void*>)
+void std::__1::__function::__policy_invoker<void ()>::__call_impl<std::__1::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<false>::ThreadFromGlobalPoolImpl<void ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>(void&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*)
+void* std::__1::__thread_proxy[abi:v15000]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, void ThreadPoolImpl<std::__1::thread>::scheduleImpl<void>(std::__1::function<void ()>, Priority, std::__1::optional<unsigned long>, bool)::'lambda0'()>>(void*)
 ```
 
 Получение имен файлов и номеров строк в исходном коде ClickHouse:

From 101975705e8df8e4c9cbd449f3bde248b7f8a240 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 6 Nov 2023 19:59:43 +0100
Subject: [PATCH 62/80] Update stress.py

---
 tests/ci/stress.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/stress.py b/tests/ci/stress.py
index a6467ac7a6e..ae918363df7 100755
--- a/tests/ci/stress.py
+++ b/tests/ci/stress.py
@@ -362,7 +362,7 @@ def main():
         )
         hung_check_log = args.output_folder / "hung_check.log"  # type: Path
         tee = Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE)
-        res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT)
+        res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT, timeout=600)
         if tee.stdin is not None:
             tee.stdin.close()
         if res != 0 and have_long_running_queries and not suppress_hung_check:

From a091f0323ee29deb7d5cbb58c5e452f0d4229104 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 6 Nov 2023 19:16:49 +0000
Subject: [PATCH 63/80] Update aspell-dict.txt

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 63775d22b64..9bf5a097144 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1,4 +1,4 @@
-personal_ws-1.1 en 543
+personal_ws-1.1 en 2633 
 AArch
 ACLs
 ALTERs
@@ -355,6 +355,7 @@ IOUringPendingEvents
 IOWriterThreads
 IOWriterThreadsActive
 IPTrie
+IProcessor
 IPv
 Identifiant
 Incrementing

From 941ece31e1ce298427f55ef777083b6e056c8a65 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 6 Nov 2023 19:25:07 +0000
Subject: [PATCH 64/80] Option to check particular file with
 utils/check-style/check-doc-aspell

---
 utils/check-style/check-doc-aspell | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/utils/check-style/check-doc-aspell b/utils/check-style/check-doc-aspell
index 952dbd5b507..b5a3958e6cf 100755
--- a/utils/check-style/check-doc-aspell
+++ b/utils/check-style/check-doc-aspell
@@ -6,9 +6,9 @@ shopt -s globstar
 # Perform spell checking on the docs
 
 if [[ ${1:-} == "--help" ]] || [[ ${1:-} == "-h" ]]; then
-    echo "Usage $0 [--help|-h] [-i]"
+    echo "Usage $0 [--help|-h] [-i [filename]]"
     echo "  --help|-h: print this help"
-    echo "  -i: interactive mode"
+    echo "  -i: interactive mode. If filename is specified, check only this file, otherwise check all files"
     exit 0
 fi
 
@@ -18,14 +18,21 @@ CHECK_LANG=en
 
 ASPELL_IGNORE_PATH="${ROOT_PATH}/utils/check-style/aspell-ignore/${CHECK_LANG}"
 
-STATUS=0
-for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do
-    if [[ ${1:-} == "-i" ]]; then
+if [[ ${1:-} == "-i" ]]; then
+    if [[ ! -z ${2:-} ]]; then
+        FILES_TO_CHECK=${ROOT_PATH}/docs/${CHECK_LANG}/${2}
+    else
+        FILES_TO_CHECK=${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md
+    fi
+    for fname in ${FILES_TO_CHECK}; do
         echo "Checking $fname"
         aspell --personal=aspell-dict.txt --add-sgml-skip=code --encoding=utf-8 --mode=markdown -W 3 --lang=${CHECK_LANG} --home-dir=${ASPELL_IGNORE_PATH} -c "$fname"
-        continue
-    fi
+    done
+    exit
+fi
 
+STATUS=0
+for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do
     errors=$(cat "$fname" \
         | aspell list \
             -W 3 \

From 5b5c8db7eddaaf2fb1d34b4978362a4f63fbe633 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 28 Aug 2023 16:00:29 +0000
Subject: [PATCH 65/80] Support isNotDistinct in JOIN ON in analyzer

---
 src/Planner/PlannerJoinTree.cpp |  7 ++++---
 src/Planner/PlannerJoins.cpp    | 23 +++++++++++++++++++++--
 src/Planner/PlannerJoins.h      | 11 ++++++++++-
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 6ffef7cda3c..b1515c69712 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -1146,12 +1146,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             const auto & join_clause_right_key_nodes = join_clause.getRightKeyNodes();
 
             size_t join_clause_key_nodes_size = join_clause_left_key_nodes.size();
-            assert(join_clause_key_nodes_size == join_clause_right_key_nodes.size());
+            chassert(join_clause_key_nodes_size == join_clause_right_key_nodes.size());
 
             for (size_t i = 0; i < join_clause_key_nodes_size; ++i)
             {
-                table_join_clause.key_names_left.push_back(join_clause_left_key_nodes[i]->result_name);
-                table_join_clause.key_names_right.push_back(join_clause_right_key_nodes[i]->result_name);
+                table_join_clause.addKey(join_clause_left_key_nodes[i]->result_name,
+                                         join_clause_right_key_nodes[i]->result_name,
+                                         join_clause.isNullsafeCompareKey(i));
             }
 
             const auto & join_clause_get_left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes();
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index 2f7c08b25ba..5f53c8e1fce 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -191,7 +191,7 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
     auto asof_inequality = getASOFJoinInequality(function_name);
     bool is_asof_join_inequality = join_node.getStrictness() == JoinStrictness::Asof && asof_inequality != ASOFJoinInequality::None;
 
-    if (function_name == "equals" || is_asof_join_inequality)
+    if (function_name == "equals" || function_name == "isNotDistinctFrom" || is_asof_join_inequality)
     {
         const auto * left_child = join_expressions_actions_node->children.at(0);
         const auto * right_child = join_expressions_actions_node->children.at(1);
@@ -253,7 +253,8 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
                 }
                 else
                 {
-                    join_clause.addKey(left_key, right_key);
+                    bool null_safe_comparison = function_name == "isNotDistinctFrom";
+                    join_clause.addKey(left_key, right_key, null_safe_comparison);
                 }
             }
             else
@@ -474,6 +475,24 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
                     right_key_node = &join_expression_actions->addCast(*right_key_node, common_type, {});
             }
 
+            if (join_clause.isNullsafeCompareKey(i) && left_key_node->result_type->isNullable() && right_key_node->result_type->isNullable())
+            {
+                /**
+                  * In case of null-safe comparison (a IS NOT DISTICT FROM b),
+                  * we need to wrap keys with a non-nullable type.
+                  * The type `tuple` can be used for this purpose,
+                  * because value tuple(NULL) is not NULL itself (moreover it has type Tuple(Nullable(T) which is not Nullable).
+                  * Thus, join algorithm will match keys with values tuple(NULL).
+                  * Example:
+                  *   SELECT * FROM t1 JOIN t2 ON t1.a <=> t2.b
+                  * This will be semantically transformed to:
+                  *   SELECT * FROM t1 JOIN t2 ON tuple(t1.a) == tuple(t2.b)
+                  */
+                auto wrap_nullsafe_function = FunctionFactory::instance().get("tuple", planner_context->getQueryContext());
+                left_key_node = &join_expression_actions->addFunction(wrap_nullsafe_function, {left_key_node}, {});
+                right_key_node = &join_expression_actions->addFunction(wrap_nullsafe_function, {right_key_node}, {});
+            }
+
             join_expression_actions->addOrReplaceInOutputs(*left_key_node);
             join_expression_actions->addOrReplaceInOutputs(*right_key_node);
 
diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h
index c61bce932e0..94f32e7ad51 100644
--- a/src/Planner/PlannerJoins.h
+++ b/src/Planner/PlannerJoins.h
@@ -53,10 +53,12 @@ class JoinClause
 {
 public:
     /// Add keys
-    void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node)
+    void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, bool null_safe_comparison = false)
     {
         left_key_nodes.emplace_back(left_key_node);
         right_key_nodes.emplace_back(right_key_node);
+        if (null_safe_comparison)
+            nullsafe_compare_key_indexes.emplace(left_key_nodes.size() - 1);
     }
 
     void addASOFKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, ASOFJoinInequality asof_inequality)
@@ -97,6 +99,11 @@ public:
         return right_key_nodes;
     }
 
+    bool isNullsafeCompareKey(size_t idx) const
+    {
+        return nullsafe_compare_key_indexes.contains(idx);
+    }
+
     /// Returns true if JOIN clause has ASOF conditions, false otherwise
     bool hasASOF() const
     {
@@ -147,6 +154,8 @@ private:
 
     ActionsDAG::NodeRawConstPtrs left_filter_condition_nodes;
     ActionsDAG::NodeRawConstPtrs right_filter_condition_nodes;
+
+    std::unordered_set<size_t> nullsafe_compare_key_indexes;
 };
 
 using JoinClauses = std::vector<JoinClause>;

From cd2e2cab835ca2356ab9a538219d6db2b276ddc4 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 1 Sep 2023 12:38:07 +0000
Subject: [PATCH 66/80] update 02861_join_on_nullsafe_compare

---
 tests/analyzer_tech_debt.txt                  |  1 -
 ...2861_join_on_nullsafe_compare.reference.j2 | 31 +++++++++++++++++++
 .../02861_join_on_nullsafe_compare.sql.j2     | 23 +++++++++-----
 3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 06fd4cc80c3..ca23acb7680 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -66,4 +66,3 @@
 01940_custom_tld_sharding_key
 02815_range_dict_no_direct_join
 02845_threads_count_in_distributed_queries
-02861_join_on_nullsafe_compare
diff --git a/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2 b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2
index d97d6c2b314..c0e35d7ae87 100644
--- a/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2
+++ b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2
@@ -647,6 +647,37 @@ join_algorithm = default, join_use_nulls = 0, t1 JOIN t4
 19	19	19	19
 \N	20	\N	0
 --
+\N	0	2	2
+\N	0	\N	4
+\N	0	6	6
+\N	0	\N	8
+\N	0	10	10
+\N	0	\N	12
+\N	0	14	14
+\N	0	\N	16
+\N	0	18	18
+\N	0	\N	20
+1	1	1	1
+\N	2	\N	0
+3	3	3	3
+\N	4	\N	0
+5	5	5	5
+\N	6	\N	0
+7	7	7	7
+\N	8	\N	0
+9	9	9	9
+\N	10	\N	0
+11	11	11	11
+\N	12	\N	0
+13	13	13	13
+\N	14	\N	0
+15	15	15	15
+\N	16	\N	0
+17	17	17	17
+\N	18	\N	0
+19	19	19	19
+\N	20	\N	0
+--
 1	42	420	1	1	43	430	1
 \N	42	420	2	\N	43	430	4
 \N	42	420	2	\N	43	430	8
diff --git a/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2 b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2
index 64960d2b2e5..2ae18d3b8a9 100644
--- a/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2
+++ b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2
@@ -30,28 +30,28 @@ SELECT 'join_algorithm = {{ join_algorithm }}, join_use_nulls = {{ join_use_null
 SELECT '--';
 
 SELECT {{ t1 }}.a, {{ t1 }}.val, {{ t2 }}.a, {{ t2 }}.val FROM {{ t1 }} FULL JOIN {{ t2 }}
-ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a)
+ON {{ t1 }}.a <=> {{ t2 }}.a
 ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
 ;
 
 SELECT '--';
 
 SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
-ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b)
+ON {{ t1 }}.a <=> {{ t2 }}.a AND {{ t1 }}.b <=> {{ t2 }}.b
 ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
 ;
 
 SELECT '--';
 
 SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
-ON {{ t1 }}.a == {{ t2 }}.a AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b)
+ON {{ t1 }}.a == {{ t2 }}.a AND {{ t1 }}.b <=> {{ t2 }}.b
 ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
 ;
 
 SELECT '--';
 
 SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
-ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND {{ t1 }}.b == {{ t2 }}.b
+ON {{ t1 }}.a <=> {{ t2 }}.a AND {{ t1 }}.b == {{ t2 }}.b
 ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
 ;
 
@@ -62,7 +62,14 @@ SELECT '--';
 SET join_use_nulls = 0;
 SET join_algorithm = 'hash';
 SELECT t1.a, t1.val, t2.a, t2.val FROM t1 FULL JOIN t2
-ON isNotDistinctFrom(t1.a, t2.a) AND t1.b < 2 OR t1.a == t2.a
+ON t1.a <=> t2.a AND t1.b < 2 OR t1.a == t2.a
+ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
+;
+
+SELECT '--';
+
+SELECT t1.a, t1.val, t2.a, t2.val FROM t1 FULL JOIN t2
+ON t1.a IS NOT DISTINCT FROM t2.a AND t1.b < 2 OR t1.a == t2.a
 ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
 ;
 
@@ -76,7 +83,7 @@ SET join_use_nulls = 1;
 SELECT *
 FROM (SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val FROM t1) t1
 JOIN (SELECT a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, val FROM t2) t2
-ON isNotDistinctFrom(t1.a, t2.a)
+ON t1.a <=> t2.a
 ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
 LIMIT 10;
 
@@ -85,7 +92,7 @@ SELECT '--';
 SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val, t2.a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, t2.val
 FROM (SELECT a, val, 111 as `__wrapNullsafe(a)_0` FROM t1) t1
 JOIN (SELECT a, val, 111 as `__wrapNullsafe(t2.a)_0` FROM t2) t2
-ON isNotDistinctFrom(t1.a, t2.a)
+ON t1.a <=> t2.a
 ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
 LIMIT 10;
 
@@ -99,3 +106,5 @@ SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(t1.a, t2.a, t2.b); -- { serverErro
 
 SELECT isNotDistinctFrom(a) from t1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT isNotDistinctFrom(a, b) from t1; -- { serverError NOT_IMPLEMENTED }
+SELECT a <=> b from t1; -- { serverError NOT_IMPLEMENTED }
+SELECT a IS NOT DISTINCT FROM b from t1; -- { serverError NOT_IMPLEMENTED }

From b195cba9435626255445d51ffa2629c571611a7d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 6 Nov 2023 20:11:55 +0000
Subject: [PATCH 67/80] Fix spelling

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 18f8c86fb23..439a615208c 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -2286,6 +2286,7 @@ stochasticlogisticregression
 storages
 storig
 stringJaccardIndex
+stringJaccardIndexUTF
 stringToH
 stripelog
 strtod

From a66fa872489e9e3afb34e0b389e38372488d19e7 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Mon, 6 Nov 2023 21:51:05 +0100
Subject: [PATCH 68/80] Add obsolete setting back (#56382)

---
 src/Core/Settings.h                                         | 1 +
 tests/queries/0_stateless/02888_obsolete_settings.reference | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 40ce779b063..aa5c8569be6 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -837,6 +837,7 @@ class IColumn;
     MAKE_OBSOLETE(M, Bool, allow_experimental_bigint_types, true) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_geo_types, true) \
+    MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \
     \
     MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \
     MAKE_OBSOLETE(M, StreamingHandleErrorMode, handle_kafka_error_mode, StreamingHandleErrorMode::DEFAULT) \
diff --git a/tests/queries/0_stateless/02888_obsolete_settings.reference b/tests/queries/0_stateless/02888_obsolete_settings.reference
index 39a395ad373..6ee5216cd73 100644
--- a/tests/queries/0_stateless/02888_obsolete_settings.reference
+++ b/tests/queries/0_stateless/02888_obsolete_settings.reference
@@ -4,6 +4,7 @@ allow_experimental_bigint_types
 allow_experimental_database_atomic
 allow_experimental_geo_types
 allow_experimental_map_type
+allow_experimental_query_cache
 allow_experimental_window_functions
 async_insert_cleanup_timeout_ms
 async_insert_stale_timeout_ms

From 816890c0691cfe941f5bb2711de81b4e2ac50056 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Tue, 7 Nov 2023 00:36:06 +0300
Subject: [PATCH 69/80] test on cluster positional_arguments

---
 ...est_positional_arguments_on_cluster.reference |  2 ++
 ...2006_test_positional_arguments_on_cluster.sql | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.reference
 create mode 100644 tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.sql

diff --git a/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.reference b/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.reference
new file mode 100644
index 00000000000..05dd41748d1
--- /dev/null
+++ b/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.reference
@@ -0,0 +1,2 @@
+d	Date					
+f	UInt64					
diff --git a/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.sql b/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.sql
new file mode 100644
index 00000000000..b1de6be9df5
--- /dev/null
+++ b/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.sql
@@ -0,0 +1,16 @@
+-- Tags: no-ordinary-database, no-replicated-database, distributed, zookeeper
+
+DROP TABLE IF EXISTS t02006 on cluster test_shard_localhost format Null;
+DROP TABLE IF EXISTS m02006 on cluster test_shard_localhost format Null;
+
+CREATE TABLE t02006 on cluster test_shard_localhost (d Date) 
+ENGINE = MergeTree ORDER BY d
+format Null;
+
+CREATE MATERIALIZED VIEW m02006 on cluster test_shard_localhost
+Engine = MergeTree ORDER BY tuple() AS SELECT d, 0 AS i FROM t02006 GROUP BY d, i
+format Null;
+
+ALTER TABLE t02006 on cluster test_shard_localhost ADD COLUMN IF NOT EXISTS f UInt64 format Null;
+
+DESC t02006;

From e47e50e30df0cb375b533a44cb888e15320f7c7d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 Nov 2023 02:49:11 +0300
Subject: [PATCH 70/80] Revert "Add function `arrayRandomSample()`"

---
 .../functions/array-functions.md              |  74 -----
 src/Functions/array/arrayRandomSample.cpp     | 118 --------
 ...new_functions_must_be_documented.reference |   1 -
 .../02874_array_random_sample.reference       |  37 ---
 .../0_stateless/02874_array_random_sample.sh  | 258 ------------------
 .../aspell-ignore/en/aspell-dict.txt          |   1 -
 6 files changed, 489 deletions(-)
 delete mode 100644 src/Functions/array/arrayRandomSample.cpp
 delete mode 100644 tests/queries/0_stateless/02874_array_random_sample.reference
 delete mode 100755 tests/queries/0_stateless/02874_array_random_sample.sh

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 40bfb65e4e8..02e5d1e5ae2 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -2172,80 +2172,6 @@ Result:
 └─────────────────────┘
 ```
 
-
-## arrayRandomSample
-
-Function `arrayRandomSample` returns a subset with `samples`-many random elements of an input array. If `samples` exceeds the size of the input array, the sample size is limited to the size of the array. In this case, all elements of the input array are returned, but the order is not guaranteed. The function can handle both flat arrays and nested arrays.
-
-**Syntax**
-
-```sql
-arrayRandomSample(arr, samples)
-```
-
-**Arguments**
-
-- `arr` — The input array from which to sample elements. This may be flat or nested arrays.
-- `samples` — An unsigned integer specifying the number of elements to include in the random sample.
-
-**Returned Value**
-
-- An array containing a random sample of elements from the input array.
-
-**Examples**
-
-Query:
-
-```sql
-SELECT arrayRandomSample(['apple', 'banana', 'cherry', 'date'], 2) as res;
-```
-
-Result:
-```
-┌─res────────────────┐
-│ ['banana','apple'] │
-└────────────────────┘
-```
-
-Query:
-
-```sql
-SELECT arrayRandomSample([[1, 2], [3, 4], [5, 6]], 2) as res;
-```
-
-Result:
-```
-┌─res───────────┐
-│ [[3,4],[5,6]] │
-└───────────────┘
-```
-
-Query:
-
-```sql
-SELECT arrayRandomSample([1, 2, 3, 4, 5], 0) as res;
-```
-
-Result:
-```
-┌─res─┐
-│ []  │
-└─────┘
-```
-
-Query:
-
-```sql
-SELECT arrayRandomSample([1, 2, 3], 5) as res;
-```
-
-Result:
-```
-┌─res─────┐
-│ [3,1,2] │
-└─────────┘
-```
-
 ## Distance functions
 
 All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).
diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp
deleted file mode 100644
index 908ca9fa30a..00000000000
--- a/src/Functions/array/arrayRandomSample.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-#include <random>
-#include <Columns/ColumnArray.h>
-#include <DataTypes/DataTypeArray.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/IFunction.h>
-#include <Poco/Logger.h>
-#include "Columns/ColumnsNumber.h"
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
-/// arrayRandomSample(arr, k) - Returns k random elements from the input array
-class FunctionArrayRandomSample : public IFunction
-{
-public:
-    static constexpr auto name = "arrayRandomSample";
-
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayRandomSample>(); }
-
-    String getName() const override { return name; }
-
-    size_t getNumberOfArguments() const override { return 2; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"array", &isArray<IDataType>, nullptr, "Array"},
-            {"samples", &isUnsignedInteger<IDataType>, isColumnConst, "const UInt*"},
-        };
-        validateFunctionArgumentTypes(*this, arguments, args);
-
-        // Return an array with the same nested type as the input array
-        const DataTypePtr & array_type = arguments[0].type;
-        const DataTypeArray * array_data_type = checkAndGetDataType<DataTypeArray>(array_type.get());
-
-        // Get the nested data type of the array
-        const DataTypePtr & nested_type = array_data_type->getNestedType();
-
-        return std::make_shared<DataTypeArray>(nested_type);
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
-    {
-        const ColumnArray * column_array = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
-        if (!column_array)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument must be an array");
-
-        const IColumn * col_samples = arguments[1].column.get();
-        if (!col_samples)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second argument is empty or null, type = {}", arguments[1].type->getName());
-
-        UInt64 samples;
-        try
-        {
-            samples = col_samples->getUInt(0);
-        }
-        catch (...)
-        {
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Failed to fetch UInt64 from the second argument column, type = {}",
-                arguments[1].type->getName());
-        }
-
-        std::random_device rd;
-        std::mt19937 gen(rd());
-
-        auto nested_column = column_array->getDataPtr()->cloneEmpty();
-        auto offsets_column = ColumnUInt64::create();
-
-        auto res_data = ColumnArray::create(std::move(nested_column), std::move(offsets_column));
-
-        const auto & input_offsets = column_array->getOffsets();
-        auto & res_offsets = res_data->getOffsets();
-        res_offsets.resize(input_rows_count);
-
-        UInt64 cur_samples;
-        size_t current_offset = 0;
-
-        for (size_t row = 0; row < input_rows_count; row++)
-        {
-            size_t row_size = input_offsets[row] - current_offset;
-
-            std::vector<size_t> indices(row_size);
-            std::iota(indices.begin(), indices.end(), 0);
-            std::shuffle(indices.begin(), indices.end(), gen);
-
-            cur_samples = std::min(samples, static_cast<UInt64>(row_size));
-
-            for (UInt64 j = 0; j < cur_samples; j++)
-            {
-                size_t source_index = indices[j];
-                res_data->getData().insertFrom(column_array->getData(), source_index);
-            }
-
-            res_offsets[row] = current_offset + cur_samples;
-            current_offset += cur_samples;
-        }
-
-        return res_data;
-    }
-};
-
-REGISTER_FUNCTION(ArrayRandomSample)
-{
-    factory.registerFunction<FunctionArrayRandomSample>();
-}
-
-}
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index 379eea4dbbb..589ea366030 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -126,7 +126,6 @@ arrayPopFront
 arrayProduct
 arrayPushBack
 arrayPushFront
-arrayRandomSample
 arrayReduce
 arrayReduceInRanges
 arrayResize
diff --git a/tests/queries/0_stateless/02874_array_random_sample.reference b/tests/queries/0_stateless/02874_array_random_sample.reference
deleted file mode 100644
index 7dab23a37b4..00000000000
--- a/tests/queries/0_stateless/02874_array_random_sample.reference
+++ /dev/null
@@ -1,37 +0,0 @@
-Running iteration: 1
-Integer Test: Passed
-String Test: Passed
-Nested Array Test: Passed
-Higher Sample Number Test: Passed
-Multi-row Test with scalar k: Passed
-Running iteration: 2
-Integer Test: Passed
-String Test: Passed
-Nested Array Test: Passed
-Higher Sample Number Test: Passed
-Multi-row Test with scalar k: Passed
-Running iteration: 3
-Integer Test: Passed
-String Test: Passed
-Nested Array Test: Passed
-Higher Sample Number Test: Passed
-Multi-row Test with scalar k: Passed
-Running iteration: 4
-Integer Test: Passed
-String Test: Passed
-Nested Array Test: Passed
-Higher Sample Number Test: Passed
-Multi-row Test with scalar k: Passed
-Running iteration: 5
-Integer Test: Passed
-String Test: Passed
-Nested Array Test: Passed
-Higher Sample Number Test: Passed
-Multi-row Test with scalar k: Passed
-Integer Test with K=0: Passed
-Empty Array with K > 0 Test: Passed
-Non-Unsigned-Integer K Test (Negative Integer): Passed
-Non-Unsigned-Integer K Test (String): Passed
-Non-Unsigned-Integer K Test (Floating-Point): Passed
-Total tests: 30
-Passed tests: 30
diff --git a/tests/queries/0_stateless/02874_array_random_sample.sh b/tests/queries/0_stateless/02874_array_random_sample.sh
deleted file mode 100755
index fe136d6d5d2..00000000000
--- a/tests/queries/0_stateless/02874_array_random_sample.sh
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env bash
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-# Initialize variables
-total_tests=0
-passed_tests=0
-
-
-# Test Function for Integer Arrays
-run_integer_test() {
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample([1,2,3], 2)")
-    mapfile -t sorted_result < <(echo "$query_result" | tr -d '[]' | tr ',' '\n' | sort -n)
-    declare -A expected_outcomes
-    expected_outcomes["1 2"]=1
-    expected_outcomes["1 3"]=1
-    expected_outcomes["2 3"]=1
-    expected_outcomes["2 1"]=1
-    expected_outcomes["3 1"]=1
-    expected_outcomes["3 2"]=1
-
-    sorted_result_str=$(echo "${sorted_result[*]}" | tr ' ' '\n' | sort -n | tr '\n' ' ' | sed 's/ $//')
-    if [[ -n "${expected_outcomes[$sorted_result_str]}" ]]; then
-        echo "Integer Test: Passed"
-        ((passed_tests++))
-    else
-        echo "Integer Test: Failed"
-        echo "Output: $query_result"
-    fi
-    ((total_tests++))
-}
-
-# Test Function for String Arrays
-run_string_test() {
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample(['a','b','c'], 2)")
-    mapfile -t sorted_result < <(echo "$query_result" | tr -d "[]'" | tr ',' '\n' | sort)
-    declare -A expected_outcomes
-    expected_outcomes["a b"]=1
-    expected_outcomes["a c"]=1
-    expected_outcomes["b c"]=1
-    expected_outcomes["b a"]=1
-    expected_outcomes["c a"]=1
-    expected_outcomes["c b"]=1
-
-    sorted_result_str=$(echo "${sorted_result[*]}" | tr ' ' '\n' | sort | tr '\n' ' ' | sed 's/ $//')
-    if [[ -n "${expected_outcomes[$sorted_result_str]}" ]]; then
-        echo "String Test: Passed"
-        ((passed_tests++))
-    else
-        echo "String Test: Failed"
-        echo "Output: $query_result"
-    fi
-    ((total_tests++))
-}
-
-# Test Function for Nested Arrays
-run_nested_array_test() {
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample([[7,2],[3,4],[7,6]], 2)")
-    # Convert to a space-separated string for easy sorting.
-    converted_result=$(echo "$query_result" | tr -d '[]' | tr ',' ' ')
-
-    # Sort the string.
-    sorted_result_str=$(echo "$converted_result" | tr ' ' '\n' | xargs -n2 | sort | tr '\n' ' ' | sed 's/ $//')
-
-    # Define all possible expected outcomes, sorted
-    declare -A expected_outcomes
-    expected_outcomes["7 2 3 4"]=1
-    expected_outcomes["7 2 7 6"]=1
-    expected_outcomes["3 4 7 6"]=1
-    expected_outcomes["3 4 7 2"]=1
-    expected_outcomes["7 6 7 2"]=1
-    expected_outcomes["7 6 3 4"]=1
-
-    if [[ -n "${expected_outcomes[$sorted_result_str]}" ]]; then
-        echo "Nested Array Test: Passed"
-        ((passed_tests++))
-    else
-        echo "Nested Array Test: Failed"
-        echo "Output: $query_result"
-        echo "Processed Output: ${sorted_result_str}"
-    fi
-    ((total_tests++))
-}
-
-
-# Test Function for K > array.size
-run_higher_k_test() {
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample([1,2,3], 5)")
-    mapfile -t sorted_result < <(echo "$query_result" | tr -d '[]' | tr ',' '\n' | sort -n)
-    sorted_original=("1" "2" "3")
-
-    are_arrays_equal=true
-    for i in "${!sorted_result[@]}"; do
-        if [[ "${sorted_result[$i]}" != "${sorted_original[$i]}" ]]; then
-            are_arrays_equal=false
-            break
-        fi
-    done
-
-    if $are_arrays_equal; then
-        echo "Higher Sample Number Test: Passed"
-        ((passed_tests++))
-    else
-        echo "Higher Sample Number Test: Failed"
-        echo "Output: $query_result"
-    fi
-    ((total_tests++))
-}
-
-# Test Function for Integer Arrays with samples = 0
-run_integer_with_samples_0_test() {
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample([1,2,3], 0)")
-    mapfile -t sorted_result < <(echo "$query_result" | tr -d '[]' | tr ',' '\n' | sort -n)
-
-    # An empty array should produce an empty string after transformations
-    declare -A expected_outcomes
-    expected_outcomes["EMPTY_ARRAY"]=1
-
-    # Prepare the result string for comparison
-    sorted_result_str=$(echo "${sorted_result[*]}" | tr ' ' '\n' | sort -n | tr '\n' ' ' | sed 's/ $//')
-
-    # Use "EMPTY_ARRAY" as a placeholder for an empty array
-    [[ -z "$sorted_result_str" ]] && sorted_result_str="EMPTY_ARRAY"
-
-    # Compare
-    if [[ -n "${expected_outcomes[$sorted_result_str]}" ]]; then
-        echo "Integer Test with K=0: Passed"
-        ((passed_tests++))
-    else
-        echo "Integer Test with K=0: Failed"
-        echo "Output: $query_result"
-    fi
-    ((total_tests++))
-}
-
-# Test Function for Empty Array with K > 0
-run_empty_array_with_k_test() {
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample([], 5)")
-
-    if [[ "$query_result" == "[]" ]]; then
-        echo "Empty Array with K > 0 Test: Passed"
-        ((passed_tests++))
-    else {
-        echo "Empty Array with K > 0 Test: Failed"
-        echo "Output: $query_result"
-    }
-    fi
-    ((total_tests++))
-}
-
-# Test Function for Non-Unsigned-Integer K
-run_non_unsigned_integer_k_test() {
-    # Test with negative integer
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample([1, 2, 3], -5)" 2>&1)
-    if [[ "$query_result" == *"ILLEGAL_TYPE_OF_ARGUMENT"* ]]; then
-        echo "Non-Unsigned-Integer K Test (Negative Integer): Passed"
-        ((passed_tests++))
-    else {
-        echo "Non-Unsigned-Integer K Test (Negative Integer): Failed"
-        echo "Output: $query_result"
-    }
-    fi
-    ((total_tests++))
-
-    # Test with string
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample([1, 2, 3], 'a')" 2>&1)
-    if [[ "$query_result" == *"ILLEGAL_TYPE_OF_ARGUMENT"* ]]; then
-        echo "Non-Unsigned-Integer K Test (String): Passed"
-        ((passed_tests++))
-    else {
-        echo "Non-Unsigned-Integer K Test (String): Failed"
-        echo "Output: $query_result"
-    }
-    fi
-    ((total_tests++))
-
-    # Test with floating-point number
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample([1, 2, 3], 1.5)" 2>&1)
-    if [[ "$query_result" == *"ILLEGAL_TYPE_OF_ARGUMENT"* ]]; then
-        echo "Non-Unsigned-Integer K Test (Floating-Point): Passed"
-        ((passed_tests++))
-    else {
-        echo "Non-Unsigned-Integer K Test (Floating-Point): Failed"
-        echo "Output: $query_result"
-    }
-    fi
-    ((total_tests++))
-}
-
-# Function to run a multi-row test with scalar 'k'
-run_multi_row_scalar_k_test() {
-    # Create a table. Use a random database name as tests potentially run in parallel.
-    db=`tr -dc A-Za-z0-9 </dev/urandom | head -c 13`
-    clickhouse-client -q "DROP DATABASE IF EXISTS ${db}"
-    clickhouse-client -q "CREATE DATABASE ${db}"
-    clickhouse-client -q "CREATE TABLE ${db}.array_test (arr Array(Int32)) ENGINE = Memory"
-
-    # Insert multi-row data into the table
-    clickhouse-client -q "INSERT INTO ${db}.array_test VALUES ([1, 2, 3]), ([4, 5, 6]), ([7, 8, 9])"
-
-    # Query using arrayRandomSample function and store the result, k is scalar here (for example, 2)
-    query_result=$(clickhouse-client -q "SELECT arrayRandomSample(arr, 2) FROM ${db}.array_test")
-
-    # Drop the table
-    clickhouse-client -q "DROP DATABASE ${db}"
-
-    # Validate the output here
-    is_test_passed=1  # flag to indicate if the test passed; 1 means passed, 0 means failed
-
-    # Iterate over each line (each array) in the output
-    echo "$query_result" | while read -r line; do
-        # Remove brackets from the array string
-        line=$(echo "$line" | tr -d '[]')
-
-        # Convert the string to an array
-        IFS=", " read -ra nums <<< "$line"
-
-        # Check if the array contains exactly 2 unique elements
-        if [[ ${#nums[@]} -ne 2 ]] || [[ ${nums[0]} -eq ${nums[1]} ]]; then
-            # shellcheck disable=SC2030
-            is_test_passed=0
-        fi
-    done
-
-    # Print test result
-    # shellcheck disable=SC2031
-    if [[ $is_test_passed -eq 1 ]]; then
-        echo "Multi-row Test with scalar k: Passed"
-        ((passed_tests++))
-    else
-        echo "Multi-row Test with scalar k: Failed"
-        echo "Output: $query_result"
-    fi
-
-    ((total_tests++))
-}
-
-
-
-# Run test multiple times
-for i in {1..5}; do
-    echo "Running iteration: $i"
-    run_integer_test
-    run_string_test
-    run_nested_array_test
-    run_higher_k_test
-    run_multi_row_scalar_k_test
-done
-
-run_integer_with_samples_0_test
-run_empty_array_with_k_test
-run_non_unsigned_integer_k_test
-
-# Print overall test results
-echo "Total tests: $total_tests"
-echo "Passed tests: $passed_tests"
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 18f8c86fb23..2da4823f358 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1075,7 +1075,6 @@ arrayPopFront
 arrayProduct
 arrayPushBack
 arrayPushFront
-arrayRandomSample
 arrayReduce
 arrayReduceInRanges
 arrayResize

From d1743e08f7c1ec93a22d66e7ba08814de0a4c63e Mon Sep 17 00:00:00 2001
From: kevinyhzou <kevinyunhe8@gmail.com>
Date: Tue, 7 Nov 2023 09:45:46 +0800
Subject: [PATCH 71/80] ci fix

---
 src/IO/ReadHelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 9b9374ff05a..19750906fdb 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -835,7 +835,7 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
 
             /// Check for single '\r' not followed by '\n'
             /// We should not stop in this case.
-            if (*buf.position() == '\r')
+            if (*buf.position() == '\r' && !settings.allow_cr_end_of_line)
             {
                 ++buf.position();
                 if (!buf.eof() && *buf.position() != '\n')

From 361472b29e06817f2fd045f71b27ba42f55c909b Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Mon, 6 Nov 2023 14:39:58 +0800
Subject: [PATCH 72/80] Add 4-letter command for yielding/resigning leadership

---
 src/Coordination/CoordinationSettings.cpp     |  2 +-
 src/Coordination/FourLetterCommand.cpp        |  8 +++++++
 src/Coordination/FourLetterCommand.h          | 13 +++++++++++
 src/Coordination/KeeperDispatcher.h           |  6 +++++
 src/Coordination/KeeperServer.cpp             |  5 ++++
 src/Coordination/KeeperServer.h               |  2 ++
 tests/integration/helpers/keeper_utils.py     |  5 ++++
 .../test_keeper_four_word_command/test.py     | 23 +++++++++++++++++++
 8 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp
index 1f27823182a..cdd691f6a79 100644
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
 }
 
 
-const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl";
+const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld";
 
 KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
     : server_id(NOT_EXIST)
diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 1bec17f2050..80ba5569131 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -172,6 +172,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
         FourLetterCommandPtr feature_flags_command = std::make_shared<FeatureFlagsCommand>(keeper_dispatcher);
         factory.registerCommand(feature_flags_command);
 
+        FourLetterCommandPtr yield_leader_command = std::make_shared<YieldLeaderCommand>(keeper_dispatcher);
+        factory.registerCommand(yield_leader_command);
+
         factory.initializeAllowList(keeper_dispatcher);
         factory.setInitialize(true);
     }
@@ -579,4 +582,9 @@ String FeatureFlagsCommand::run()
     return ret.str();
 }
 
+String YieldLeaderCommand::run()
+{
+    return keeper_dispatcher.yieldLeader() ? "Sent yield leadership request to leader." : "Failed to send yield leadership request to leader.";
+}
+
 }
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index 0520da06b6d..239f53844d2 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -415,4 +415,17 @@ struct FeatureFlagsCommand : public IFourLetterCommand
     ~FeatureFlagsCommand() override = default;
 };
 
+/// Yield to be leader.
+struct YieldLeaderCommand : public IFourLetterCommand
+{
+    explicit YieldLeaderCommand(KeeperDispatcher & keeper_dispatcher_)
+        : IFourLetterCommand(keeper_dispatcher_)
+    {
+    }
+
+    String name() override { return "ydld"; }
+    String run() override;
+    ~YieldLeaderCommand() override = default;
+};
+
 }
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index 39941f55d5e..ff16b4d5e9d 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -237,6 +237,12 @@ public:
         return server->requestLeader();
     }
 
+    /// Yield to be leader.
+    bool yieldLeader()
+    {
+        return server->yieldLeader();
+    }
+
     void recalculateStorageStats()
     {
         return server->recalculateStorageStats();
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index fd3db6f1032..bebe951c18f 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -1087,6 +1087,11 @@ bool KeeperServer::requestLeader()
     return isLeader() || raft_instance->request_leadership();
 }
 
+bool KeeperServer::yieldLeader()
+{
+    return isLeader() && raft_instance->yield_leadership();
+}
+
 void KeeperServer::recalculateStorageStats()
 {
     state_machine->recalculateStorageStats();
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index ed58418fe5f..5b0e7a9eccc 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -141,6 +141,8 @@ public:
 
     bool requestLeader();
 
+    bool yieldLeader();
+
     void recalculateStorageStats();
 };
 
diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py
index 79d498b909f..83d0f2969b7 100644
--- a/tests/integration/helpers/keeper_utils.py
+++ b/tests/integration/helpers/keeper_utils.py
@@ -248,6 +248,11 @@ def is_leader(cluster, node, port=9181):
     return "Mode: leader" in stat
 
 
+def is_follower(cluster, node, port=9181):
+    stat = send_4lw_cmd(cluster, node, "stat", port)
+    return "Mode: follower" in stat
+
+
 def get_leader(cluster, nodes):
     for node in nodes:
         if is_leader(cluster, node):
diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index 5419d2334c7..f07b2c036bd 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -718,3 +718,26 @@ def test_cmd_clrs(started_cluster):
 
     finally:
         destroy_zk_client(zk)
+
+
+def test_cmd_ydld(started_cluster):
+    wait_nodes()
+    for node in [node1, node3]:
+        data = keeper_utils.send_4lw_cmd(cluster, node, cmd="ydld")
+        assert data == "Sent yield leadership request to leader."
+
+        print("ydld output -------------------------------------")
+        print(data)
+
+        if keeper_utils.is_leader(cluster, node):
+            # wait for it to yield leadership
+            retry = 0
+            while keeper_utils.is_leader(cluster, node) and retry < 30:
+                time.sleep(1)
+                retry += 1
+            if retry == 30:
+                print(
+                    node.name
+                    + " did not yield leadership after 30s, maybe there is something wrong."
+                )
+        assert keeper_utils.is_follower(cluster, node)

From 74b53218b87b622d1b160e60d02d47a62e5dcf26 Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Mon, 6 Nov 2023 23:04:10 +0800
Subject: [PATCH 73/80] Rename yield_leader with yield_leadship and its
 variants

---
 src/Coordination/FourLetterCommand.cpp | 8 ++++----
 src/Coordination/FourLetterCommand.h   | 8 ++++----
 src/Coordination/KeeperDispatcher.h    | 6 +++---
 src/Coordination/KeeperServer.cpp      | 2 +-
 src/Coordination/KeeperServer.h        | 2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 80ba5569131..85d18e36feb 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -172,8 +172,8 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat
         FourLetterCommandPtr feature_flags_command = std::make_shared<FeatureFlagsCommand>(keeper_dispatcher);
         factory.registerCommand(feature_flags_command);
 
-        FourLetterCommandPtr yield_leader_command = std::make_shared<YieldLeaderCommand>(keeper_dispatcher);
-        factory.registerCommand(yield_leader_command);
+        FourLetterCommandPtr yield_leadership_command = std::make_shared<YieldLeadershipCommand>(keeper_dispatcher);
+        factory.registerCommand(yield_leadership_command);
 
         factory.initializeAllowList(keeper_dispatcher);
         factory.setInitialize(true);
@@ -582,9 +582,9 @@ String FeatureFlagsCommand::run()
     return ret.str();
 }
 
-String YieldLeaderCommand::run()
+String YieldLeadershipCommand::run()
 {
-    return keeper_dispatcher.yieldLeader() ? "Sent yield leadership request to leader." : "Failed to send yield leadership request to leader.";
+    return keeper_dispatcher.yieldLeadership() ? "Sent yield leadership request to leader." : "Failed to send yield leadership request to leader.";
 }
 
 }
diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h
index 239f53844d2..bb3c616e080 100644
--- a/src/Coordination/FourLetterCommand.h
+++ b/src/Coordination/FourLetterCommand.h
@@ -415,17 +415,17 @@ struct FeatureFlagsCommand : public IFourLetterCommand
     ~FeatureFlagsCommand() override = default;
 };
 
-/// Yield to be leader.
-struct YieldLeaderCommand : public IFourLetterCommand
+/// Yield leadership and become follower.
+struct YieldLeadershipCommand : public IFourLetterCommand
 {
-    explicit YieldLeaderCommand(KeeperDispatcher & keeper_dispatcher_)
+    explicit YieldLeadershipCommand(KeeperDispatcher & keeper_dispatcher_)
         : IFourLetterCommand(keeper_dispatcher_)
     {
     }
 
     String name() override { return "ydld"; }
     String run() override;
-    ~YieldLeaderCommand() override = default;
+    ~YieldLeadershipCommand() override = default;
 };
 
 }
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index ff16b4d5e9d..43cedd5aa2c 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -237,10 +237,10 @@ public:
         return server->requestLeader();
     }
 
-    /// Yield to be leader.
-    bool yieldLeader()
+    /// Yield leadership and become follower.
+    bool yieldLeadership()
     {
-        return server->yieldLeader();
+        return server->yieldLeadership();
     }
 
     void recalculateStorageStats()
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index bebe951c18f..63e270e1ca0 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -1087,7 +1087,7 @@ bool KeeperServer::requestLeader()
     return isLeader() || raft_instance->request_leadership();
 }
 
-bool KeeperServer::yieldLeader()
+bool KeeperServer::yieldLeadership()
 {
     return isLeader() && raft_instance->yield_leadership();
 }
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index 5b0e7a9eccc..63bf77dc43e 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -141,7 +141,7 @@ public:
 
     bool requestLeader();
 
-    bool yieldLeader();
+    bool yieldLeadership();
 
     void recalculateStorageStats();
 };

From 6c5b60b12dcf24d3c4ad9ad11f86e87d38f0baa7 Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Tue, 7 Nov 2023 00:27:44 +0800
Subject: [PATCH 74/80] Empty commit


From 0aaaf320b6bc6794488c6f22b8e75ed5c579dd9c Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Tue, 7 Nov 2023 09:53:41 +0800
Subject: [PATCH 75/80] Update document of keeper-client to let CI run

---
 docs/en/operations/utilities/clickhouse-keeper-client.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md
index d06d88598a7..d6e11fb9613 100644
--- a/docs/en/operations/utilities/clickhouse-keeper-client.md
+++ b/docs/en/operations/utilities/clickhouse-keeper-client.md
@@ -12,6 +12,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
 -   `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
 -   `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
 -   `-p N`, `--port=N` — Server port. Default value: 9181
+-   `-c FILE_PATH`, `--config-file=FILE_PATH` — Set path of config file to get the connection string. Default value: `config.xml`.
 -   `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
 -   `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
 -   `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.

From 201a7b8d5fc4b36b787c100ce251a14725bc613c Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Tue, 7 Nov 2023 14:06:55 +0800
Subject: [PATCH 76/80] fix

---
 src/Coordination/KeeperServer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 63e270e1ca0..ef6a88cd141 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -1089,7 +1089,8 @@ bool KeeperServer::requestLeader()
 
 bool KeeperServer::yieldLeadership()
 {
-    return isLeader() && raft_instance->yield_leadership();
+    if (isLeader())
+        raft_instance->yield_leadership();
 }
 
 void KeeperServer::recalculateStorageStats()

From 4e8ff997c0ea8991ab705acff39a43daaf09f04b Mon Sep 17 00:00:00 2001
From: Pradeep Chhetri <pradeepchhetri4444@gmail.com>
Date: Tue, 7 Nov 2023 14:20:40 +0800
Subject: [PATCH 77/80] fix

---
 src/Coordination/FourLetterCommand.cpp | 3 ++-
 src/Coordination/KeeperDispatcher.h    | 2 +-
 src/Coordination/KeeperServer.cpp      | 2 +-
 src/Coordination/KeeperServer.h        | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp
index 85d18e36feb..be2c5ebd071 100644
--- a/src/Coordination/FourLetterCommand.cpp
+++ b/src/Coordination/FourLetterCommand.cpp
@@ -584,7 +584,8 @@ String FeatureFlagsCommand::run()
 
 String YieldLeadershipCommand::run()
 {
-    return keeper_dispatcher.yieldLeadership() ? "Sent yield leadership request to leader." : "Failed to send yield leadership request to leader.";
+    keeper_dispatcher.yieldLeadership();
+    return "Sent yield leadership request to leader.";
 }
 
 }
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index 43cedd5aa2c..6483de7bd19 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -238,7 +238,7 @@ public:
     }
 
     /// Yield leadership and become follower.
-    bool yieldLeadership()
+    void yieldLeadership()
     {
         return server->yieldLeadership();
     }
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index ef6a88cd141..d1a9eeca61c 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -1087,7 +1087,7 @@ bool KeeperServer::requestLeader()
     return isLeader() || raft_instance->request_leadership();
 }
 
-bool KeeperServer::yieldLeadership()
+void KeeperServer::yieldLeadership()
 {
     if (isLeader())
         raft_instance->yield_leadership();
diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h
index 63bf77dc43e..fcba4a2fffd 100644
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@@ -141,7 +141,7 @@ public:
 
     bool requestLeader();
 
-    bool yieldLeadership();
+    void yieldLeadership();
 
     void recalculateStorageStats();
 };

From dbbf6bb2c38f29aad80e7d35be46b693a6ae3ce5 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Tue, 7 Nov 2023 02:24:53 -0400
Subject: [PATCH 78/80] Update 02006_test_positional_arguments_on_cluster.sql

---
 .../0_stateless/02006_test_positional_arguments_on_cluster.sql | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.sql b/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.sql
index b1de6be9df5..1d8a4e4f8d7 100644
--- a/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.sql
+++ b/tests/queries/0_stateless/02006_test_positional_arguments_on_cluster.sql
@@ -14,3 +14,6 @@ format Null;
 ALTER TABLE t02006 on cluster test_shard_localhost ADD COLUMN IF NOT EXISTS f UInt64 format Null;
 
 DESC t02006;
+
+DROP TABLE IF EXISTS t02006 on cluster test_shard_localhost format Null;
+DROP TABLE IF EXISTS m02006 on cluster test_shard_localhost format Null;

From 0555f8a68c768cbcb67d1dcdc64af5534b866922 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Tue, 7 Nov 2023 12:01:52 +0100
Subject: [PATCH 79/80] Fix crash in case of adding a column with type
 Object(JSON) (#56307)

---
 src/Storages/AlterCommands.cpp                | 21 ++++++--
 .../01825_type_json_add_column.sql.j2         |  3 +-
 ...910_object-json-crash-add-column.reference |  0
 .../02910_object-json-crash-add-column.sql    | 49 +++++++++++++++++++
 4 files changed, 68 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02910_object-json-crash-add-column.reference
 create mode 100644 tests/queries/0_stateless/02910_object-json-crash-add-column.sql

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index c6fa17583b5..1f0dcb625f9 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -44,6 +44,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int DUPLICATE_COLUMN;
     extern const int NOT_IMPLEMENTED;
+    extern const int SUPPORT_IS_DISABLED;
 }
 
 namespace
@@ -1083,6 +1084,13 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
                                 "Data type have to be specified for column {} to add", backQuote(column_name));
 
+            /// FIXME: Adding a new column of type Object(JSON) is broken.
+            /// Looks like there is something around default expression for this column (method `getDefault` is not implemented for the data type Object).
+            /// But after ALTER TABLE ADD COLUMN we need to fill existing rows with something (exactly the default value).
+            /// So we don't allow to do it for now.
+            if (command.data_type->hasDynamicSubcolumns())
+                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs");
+
             if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast<MergeTreeData>(table))
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: "
                                 "this column name is reserved for lightweight delete feature", backQuote(column_name));
@@ -1145,17 +1153,22 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
                 }
             }
 
-            /// The change of data type to/from Object is broken, so disable it for now
+            /// FIXME: Modifying the column to/from Object(JSON) is broken.
+            /// Looks like there is something around default expression for this column (method `getDefault` is not implemented for the data type Object).
+            /// But after ALTER TABLE MODIFY COLUMN we need to fill existing rows with something (exactly the default value) or calculate the common type for it.
+            /// So we don't allow to do it for now.
             if (command.data_type)
             {
                 const GetColumnsOptions options(GetColumnsOptions::AllPhysical);
                 const auto old_data_type = all_columns.getColumn(options, column_name).type;
 
-                if (command.data_type->getName().contains("Object")
-                    || old_data_type->getName().contains("Object"))
+                bool new_type_has_object = command.data_type->hasDynamicSubcolumns();
+                bool old_type_has_object = old_data_type->hasDynamicSubcolumns();
+
+                if (new_type_has_object || old_type_has_object)
                     throw Exception(
                         ErrorCodes::BAD_ARGUMENTS,
-                        "The change of data type {} of column {} to {} is not allowed",
+                        "The change of data type {} of column {} to {} is not allowed. It has known bugs",
                         old_data_type->getName(), backQuote(column_name), command.data_type->getName());
             }
 
diff --git a/tests/queries/0_stateless/01825_type_json_add_column.sql.j2 b/tests/queries/0_stateless/01825_type_json_add_column.sql.j2
index 87c76c042a6..d787ba9b163 100644
--- a/tests/queries/0_stateless/01825_type_json_add_column.sql.j2
+++ b/tests/queries/0_stateless/01825_type_json_add_column.sql.j2
@@ -1,4 +1,5 @@
--- Tags: no-fasttest
+-- Tags: no-fasttest, disabled
+-- Disabled, because ClickHouse server may crash. https://github.com/ClickHouse/ClickHouse/pull/56307
 
 {% for storage in ["MergeTree", "ReplicatedMergeTree('/clickhouse/tables/{database}/test_01825_add_column/', 'r1')"] -%}
 
diff --git a/tests/queries/0_stateless/02910_object-json-crash-add-column.reference b/tests/queries/0_stateless/02910_object-json-crash-add-column.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02910_object-json-crash-add-column.sql b/tests/queries/0_stateless/02910_object-json-crash-add-column.sql
new file mode 100644
index 00000000000..b2d64be1676
--- /dev/null
+++ b/tests/queries/0_stateless/02910_object-json-crash-add-column.sql
@@ -0,0 +1,49 @@
+DROP TABLE IF EXISTS test02910;
+
+CREATE TABLE test02910
+(
+	i Int8,
+	jString String
+) ENGINE = MergeTree
+ORDER BY i;
+
+INSERT INTO test02910 (i, jString) SELECT 1, '{"a":"123"}';
+
+ALTER TABLE test02910 ADD COLUMN j2 Tuple(JSON) DEFAULT jString;  -- { serverError SUPPORT_IS_DISABLED }
+ALTER TABLE test02910 ADD COLUMN j2 Tuple(Float64, JSON);  -- { serverError SUPPORT_IS_DISABLED }
+ALTER TABLE test02910 ADD COLUMN j2 Tuple(Array(Tuple(JSON))) DEFAULT jString;  -- { serverError SUPPORT_IS_DISABLED }
+ALTER TABLE test02910 ADD COLUMN j2 JSON default jString;  -- { serverError SUPPORT_IS_DISABLED }
+
+-- If we would allow adding a column with dynamic subcolumns the subsequent select would crash the server.
+-- SELECT * FROM test02910;
+
+DROP TABLE IF EXISTS test02910_second;
+
+CREATE TABLE test02910_second
+(
+    `Id1` String,
+    `Id2` String,
+    `timestamp` DateTime64(6),
+    `tags` Array(String),
+)
+ENGINE = MergeTree
+PRIMARY KEY (Id1, Id2)
+ORDER BY (Id1, Id2, timestamp)
+SETTINGS index_granularity = 8192, index_granularity_bytes = 0;
+
+INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', [] FROM numbers(10);
+INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', ['a'] FROM numbers(10);
+INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', ['b'] FROM numbers(10);
+INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', ['c', 'd'] FROM numbers(10);
+INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', [] FROM numbers(10);
+
+ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(JSON) DEFAULT jString;  -- { serverError SUPPORT_IS_DISABLED }
+ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Float64, JSON);  -- { serverError SUPPORT_IS_DISABLED }
+ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Array(Tuple(JSON))) DEFAULT jString;  -- { serverError SUPPORT_IS_DISABLED }
+ALTER TABLE test02910_second ADD COLUMN `tags_json` JSON; -- { serverError SUPPORT_IS_DISABLED }
+
+-- If we would allow adding a column with dynamic subcolumns the subsequent select would crash the server.
+-- SELECT * FROM test02910;
+
+DROP TABLE IF EXISTS test02910;
+DROP TABLE IF EXISTS test02910_second;

From 706f2dcf54a1a894be72d8d1805926aea1a4a448 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 7 Nov 2023 12:55:57 +0100
Subject: [PATCH 80/80] Add a metric for suspicious parts in ZooKeeper (#56395)

---
 src/Common/ProfileEvents.cpp                | 1 +
 src/Storages/StorageReplicatedMergeTree.cpp | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index d497d1ca814..018bf3dccf1 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -101,6 +101,7 @@
     M(ReplicatedPartChecks, "Number of times we had to perform advanced search for a data part on replicas or to clarify the need of an existing data part.") \
     M(ReplicatedPartChecksFailed, "Number of times the advanced search for a data part on replicas did not give result or when unexpected part has been found and moved away.") \
     M(ReplicatedDataLoss, "Number of times a data part that we wanted doesn't exist on any replica (even on replicas that are offline right now). That data parts are definitely lost. This is normal due to asynchronous replication (if quorum inserts were not enabled), when the replica on which the data part was written was failed and when it became online after fail it doesn't contain that data part.") \
+    M(ReplicatedCoveredPartsInZooKeeperOnStart, "For debugging purposes. Number of parts in ZooKeeper that have a covering part, but doesn't exist on disk. Checked on server start.") \
     \
     M(InsertedRows, "Number of rows INSERTed to all tables.") \
     M(InsertedBytes, "Number of bytes (uncompressed; for columns as they stored in memory) INSERTed to all tables.") \
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 33d9970e1ae..c96b376f8b0 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -133,6 +133,7 @@ namespace ProfileEvents
     extern const Event CreatedLogEntryForMutation;
     extern const Event NotCreatedLogEntryForMutation;
     extern const Event ReplicaPartialShutdown;
+    extern const Event ReplicatedCoveredPartsInZooKeeperOnStart;
 }
 
 namespace CurrentMetrics
@@ -1319,6 +1320,7 @@ void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(
         {
             LOG_WARNING(log, "Part {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. "
                         "It may cause false-positive 'part is lost forever' messages", part_name, covering_part);
+            ProfileEvents::increment(ProfileEvents::ReplicatedCoveredPartsInZooKeeperOnStart);
             chassert(false);
         }
     }