From 0d46e7555b066298603b5e0cd4dc122e74863ebf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 20 Feb 2024 19:08:33 +0100
Subject: [PATCH 001/363] Fix unexpected behavior with FORMAT and SETTINGS
 parsing

---
 src/Parsers/ParserQueryWithOutput.cpp | 80 ++++++++++++++++++---------
 1 file changed, 54 insertions(+), 26 deletions(-)
diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp
index 7a627ae5f6a..340abf27c31 100644
--- a/src/Parsers/ParserQueryWithOutput.cpp
+++ b/src/Parsers/ParserQueryWithOutput.cpp
@@ -150,37 +150,65 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
 
     }
 
+    /// These two sections are allowed in an arbitrary order.
     ParserKeyword s_format("FORMAT");
-
-    if (s_format.ignore(pos, expected))
-    {
-        ParserIdentifier format_p;
-
-        if (!format_p.parse(pos, query_with_output.format, expected))
-            return false;
-        setIdentifierSpecial(query_with_output.format);
-
-        query_with_output.children.push_back(query_with_output.format);
-    }
-
-    // SETTINGS key1 = value1, key2 = value2, ...
     ParserKeyword s_settings("SETTINGS");
-    if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
-    {
-        ParserSetQuery parser_settings(true);
-        if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
-            return false;
-        query_with_output.children.push_back(query_with_output.settings_ast);
 
-        // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery)
-        // Pass them manually, to apply in InterpreterSelectQuery::initSettings()
-        if (query->as<ASTSelectWithUnionQuery>())
+    /** Why: let's take the following example:
+      * SELECT 1 UNION ALL SELECT 2 FORMAT TSV
+      * Each subquery can be put in parentheses and have its own settings:
+      *   (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV
+      * And the whole query can have settings:
+      *   (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV SETTINGS e=f
+      * A single query with output is parsed in the same way as the UNION ALL chain:
+      *   SELECT 1 SETTINGS a=b FORMAT TSV SETTINGS e=f
+      * So while these forms have a slightly different meaning, they both exist:
+      *   SELECT 1 SETTINGS a=b FORMAT TSV
+      *   SELECT 1 FORMAT TSV SETTINGS e=f
+      * And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order.
+      * But while this work:
+      *   (SELECT 1) UNION ALL (SELECT 2) FORMAT TSV SETTINGS d=f
+      * This does not work automatically, unless we explicitly allow different orders:
+      *   (SELECT 1) UNION ALL (SELECT 2) SETTINGS d=f FORMAT TSV
+      * Inevitably, we also allow this:
+      *   SELECT 1 SETTINGS a=b SETTINGS d=f FORMAT TSV
+      *   ^^^^^^^^^^^^^^^^^^^^^
+      * Because this part is consumed into ASTSelectWithUnionQuery
+      * and the rest into ASTQueryWithOutput.
+      */
+
+    for (size_t i = 0; i < 2; ++i)
+    {
+        if (!query_with_output.format && s_format.ignore(pos, expected))
         {
-            auto settings = query_with_output.settings_ast->clone();
-            assert_cast<ASTSetQuery *>(settings.get())->print_in_format = false;
-            QueryWithOutputSettingsPushDownVisitor::Data data{settings};
-            QueryWithOutputSettingsPushDownVisitor(data).visit(query);
+            ParserIdentifier format_p;
+
+            if (!format_p.parse(pos, query_with_output.format, expected))
+                return false;
+            setIdentifierSpecial(query_with_output.format);
+
+            query_with_output.children.push_back(query_with_output.format);
         }
+        else if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
+        {
+            // SETTINGS key1 = value1, key2 = value2, ...
+            ParserSetQuery parser_settings(true);
+            if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
+                return false;
+            query_with_output.children.push_back(query_with_output.settings_ast);
+
+            // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery)
+            // Pass them manually, to apply in InterpreterSelectQuery::initSettings()
+            if (query->as<ASTSelectWithUnionQuery>())
+            {
+                auto settings = query_with_output.settings_ast->clone();
+                assert_cast<ASTSetQuery *>(settings.get())->print_in_format = false;
+                QueryWithOutputSettingsPushDownVisitor::Data data{settings};
+                QueryWithOutputSettingsPushDownVisitor(data).visit(query);
+            }
+        }
+        else
+            break;
     }
 
     node = std::move(query);

From 3fb45ff1762867cbeb53e31cd4492dda5cf8dbb7 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 14 May 2024 19:17:56 +0200
Subject: [PATCH 002/363] Add setting and implementation

---
 src/Core/Settings.h                           |  1 +
 src/Formats/FormatFactory.cpp                 |  1 +
 src/Formats/FormatSettings.h                  |  1 +
 src/Formats/SchemaInferenceUtils.cpp          | 44 ++++++++++++++++++-
 .../0_stateless/03150_infer_type_variant.sql  |  4 ++
 5 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03150_infer_type_variant.sql

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4a0de354a03..0c0614550e5 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1066,6 +1066,7 @@ class IColumn;
     M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
     M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
     M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
+    M(Bool, input_format_json_infer_variant_from_multitype_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \
     M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 43ccee173f0..e027c693094 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -137,6 +137,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings;
     format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings;
     format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings;
+    format_settings.json.infer_variant_from_multitype_array = settings.input_format_json_infer_variant_from_multitype_array;
     format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata;
     format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
     format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index d5fedf99adb..6cac41bd63e 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -223,6 +223,7 @@ struct FormatSettings
         bool compact_allow_variable_number_of_columns = false;
         bool try_infer_objects_as_tuples = false;
         bool infer_incomplete_types_as_strings = true;
+        bool infer_variant_from_multitype_array = false;
         bool throw_on_bad_escape_sequence = true;
         bool ignore_unnecessary_fields = true;
     } json{};
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 02c0aa6dd77..fc1a26f9b2f 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -7,6 +7,7 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeVariant.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNothing.h>
@@ -307,6 +308,22 @@ namespace
         type_indexes.erase(TypeIndex::UInt64);
     }
 
+    /// if setting input_format_json_infer_variant_from_multitype_array is true
+    /// and nested types are not equal then we convert to type variant.
+    void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes)
+    {
+        auto variant_type = std::make_shared<DataTypeVariant>(data_types);
+        /// replace separate types with a single variant type
+        data_types.clear();
+        type_indexes.clear();
+        data_types.push_back(variant_type);
+        type_indexes.insert(TypeIndex::Variant);
+
+        // push it back again
+        data_types.push_back(variant_type);
+        type_indexes.insert(TypeIndex::Variant);
+    }
+
     /// If we have only Date and DateTime types, convert Date to DateTime,
     /// otherwise, convert all Date and DateTime to String.
     void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes)
@@ -649,6 +666,12 @@ namespace
 
             /// Check settings specific for JSON formats.
 
+            if (settings.json.infer_variant_from_multitype_array)
+            {
+                transformVariant(data_types, type_indexes);
+                return;
+            }
+
             /// Convert numbers inferred from strings back to strings if needed.
             if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings)
                 transformJSONNumbersBackToString(data_types, settings, type_indexes, json_info);
@@ -677,6 +700,12 @@ namespace
             if constexpr (!is_json)
                 return;
 
+            if (settings.json.infer_variant_from_multitype_array)
+            {
+                transformVariant(data_types, type_indexes);
+                return;
+            }
+
             /// Convert JSON tuples with same nested types to arrays.
             transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes);
 
@@ -822,7 +851,6 @@ namespace
 
             if (checkIfTypesAreEqual(nested_types_copy))
                 return std::make_shared<DataTypeArray>(nested_types_copy.back());
-
             return std::make_shared<DataTypeTuple>(nested_types);
         }
         else
@@ -1482,6 +1510,20 @@ DataTypePtr makeNullableRecursively(DataTypePtr type)
         return nested_type ? std::make_shared<DataTypeArray>(nested_type) : nullptr;
     }
 
+    if (which.isVariant())
+    {
+        const auto * variant_type = assert_cast<const DataTypeVariant *>(type.get());
+        DataTypes nested_types;
+        for (const auto & nested_type: variant_type->getVariants())
+        {
+            /// unlike tuple or array, here we do not want to make any of the variants nullable
+            /// so we do not call makeNullableRecursively
+            nested_types.push_back(nested_type);
+        }
+
+        return std::make_shared<DataTypeVariant>(nested_types);
+    }
+
     if (which.isTuple())
     {
         const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql
new file mode 100644
index 00000000000..ac544d04f6e
--- /dev/null
+++ b/tests/queries/0_stateless/03150_infer_type_variant.sql
@@ -0,0 +1,4 @@
+SET input_format_json_infer_variant_from_multitype_array=1;
+SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}');
+SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}');
+SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}');

From 467366af990215e11b4b0309b90b3e6d9ebca5fd Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 14 May 2024 21:55:56 +0200
Subject: [PATCH 003/363] Fix unimplemented serialization error and update
 reference file

---
 src/Formats/SchemaInferenceUtils.cpp          | 13 ++++++++---
 .../03150_infer_type_variant.reference        | 22 +++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/03150_infer_type_variant.reference

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index fc1a26f9b2f..d0d29892dec 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -319,7 +319,7 @@ namespace
         data_types.push_back(variant_type);
         type_indexes.insert(TypeIndex::Variant);
 
-        // push it back again
+        // make the second type variant as well
         data_types.push_back(variant_type);
         type_indexes.insert(TypeIndex::Variant);
     }
@@ -669,7 +669,6 @@ namespace
             if (settings.json.infer_variant_from_multitype_array)
             {
                 transformVariant(data_types, type_indexes);
-                return;
             }
 
             /// Convert numbers inferred from strings back to strings if needed.
@@ -703,7 +702,6 @@ namespace
             if (settings.json.infer_variant_from_multitype_array)
             {
                 transformVariant(data_types, type_indexes);
-                return;
             }
 
             /// Convert JSON tuples with same nested types to arrays.
@@ -1440,6 +1438,15 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F
 
         return;
     }
+
+    if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(data_type.get()))
+    {
+        auto nested_types = variant_type->getVariants();
+        for (auto & nested_type : nested_types)
+            transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, remain_nothing_types);
+        data_type = std::make_shared<DataTypeVariant>(nested_types);
+        return;
+    }
 }
 
 void transformFinalInferredJSONTypeIfNeeded(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info)
diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference
new file mode 100644
index 00000000000..ffb4209eadb
--- /dev/null
+++ b/tests/queries/0_stateless/03150_infer_type_variant.reference
@@ -0,0 +1,22 @@
+   ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+   ┃ arr              ┃ toTypeName(arr)                     ┃
+   ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│
+   │                  │…    a Int64)))                      │
+   └──────────────────┴─────────────────────────────────────┘
+   ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
+   ┃ x     ┃ toTypeName(x)          ┃
+   ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
+1. │ 42    │ Variant(Int64, String) │
+   ├───────┼────────────────────────┤
+2. │ Hello │ Variant(Int64, String) │
+   └───────┴────────────────────────┘
+   ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+   ┃ x       ┃ toTypeName(x)                ┃
+   ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+1. │ [1,2,3] │ Variant(Array(Int64), Tuple(…│
+   │         │…    a Int64))                │
+   ├─────────┼──────────────────────────────┤
+2. │ (42)    │ Variant(Array(Int64), Tuple(…│
+   │         │…    a Int64))                │
+   └─────────┴──────────────────────────────┘

From 4066c6bc548979703f45ba264437f5966c403d6a Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 15 May 2024 02:13:53 +0000
Subject: [PATCH 004/363] Update setting name

---
 src/Core/Settings.h                                    | 2 +-
 src/Formats/FormatFactory.cpp                          | 2 +-
 src/Formats/FormatSettings.h                           | 2 +-
 src/Formats/SchemaInferenceUtils.cpp                   | 6 +++---
 tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 0c0614550e5..ffc337b674f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1066,7 +1066,7 @@ class IColumn;
     M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
     M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
     M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
-    M(Bool, input_format_json_infer_variant_from_multitype_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \
+    M(Bool, input_format_json_infer_variant_from_multi_type_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \
     M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index e027c693094..792ac08a5df 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -137,7 +137,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings;
     format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings;
     format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings;
-    format_settings.json.infer_variant_from_multitype_array = settings.input_format_json_infer_variant_from_multitype_array;
+    format_settings.json.infer_variant_from_multi_type_array = settings.input_format_json_infer_variant_from_multi_type_array;
     format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata;
     format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
     format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 6cac41bd63e..d2c75872326 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -223,7 +223,7 @@ struct FormatSettings
         bool compact_allow_variable_number_of_columns = false;
         bool try_infer_objects_as_tuples = false;
         bool infer_incomplete_types_as_strings = true;
-        bool infer_variant_from_multitype_array = false;
+        bool infer_variant_from_multi_type_array = false;
         bool throw_on_bad_escape_sequence = true;
         bool ignore_unnecessary_fields = true;
     } json{};
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index d0d29892dec..f693916c584 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -308,7 +308,7 @@ namespace
         type_indexes.erase(TypeIndex::UInt64);
     }
 
-    /// if setting input_format_json_infer_variant_from_multitype_array is true
+    /// if setting input_format_json_infer_variant_from_multi_type_array is true
     /// and nested types are not equal then we convert to type variant.
     void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
@@ -666,7 +666,7 @@ namespace
 
             /// Check settings specific for JSON formats.
 
-            if (settings.json.infer_variant_from_multitype_array)
+            if (settings.json.infer_variant_from_multi_type_array)
             {
                 transformVariant(data_types, type_indexes);
             }
@@ -699,7 +699,7 @@ namespace
             if constexpr (!is_json)
                 return;
 
-            if (settings.json.infer_variant_from_multitype_array)
+            if (settings.json.infer_variant_from_multi_type_array)
             {
                 transformVariant(data_types, type_indexes);
             }
diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql
index ac544d04f6e..2ea849248f7 100644
--- a/tests/queries/0_stateless/03150_infer_type_variant.sql
+++ b/tests/queries/0_stateless/03150_infer_type_variant.sql
@@ -1,4 +1,4 @@
-SET input_format_json_infer_variant_from_multitype_array=1;
+SET input_format_json_infer_variant_from_multi_type_array=1;
 SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}');
 SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}');
 SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}');

From 2762cf86d2ba3f5c1ac86040b6ef484feb40837b Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 15 May 2024 14:19:17 +0200
Subject: [PATCH 005/363] fix test file

---
 tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql
index 2ea849248f7..ac544d04f6e 100644
--- a/tests/queries/0_stateless/03150_infer_type_variant.sql
+++ b/tests/queries/0_stateless/03150_infer_type_variant.sql
@@ -1,4 +1,4 @@
-SET input_format_json_infer_variant_from_multi_type_array=1;
+SET input_format_json_infer_variant_from_multitype_array=1;
 SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}');
 SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}');
 SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}');

From 4800aa6a6cdf5a8431fcc3d6fd96672590da0fff Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 15 May 2024 14:21:52 +0200
Subject: [PATCH 006/363] rename setting in test file

---
 tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql
index ac544d04f6e..2ea849248f7 100644
--- a/tests/queries/0_stateless/03150_infer_type_variant.sql
+++ b/tests/queries/0_stateless/03150_infer_type_variant.sql
@@ -1,4 +1,4 @@
-SET input_format_json_infer_variant_from_multitype_array=1;
+SET input_format_json_infer_variant_from_multi_type_array=1;
 SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}');
 SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}');
 SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}');

From fbf34519a5e72bb03c57ee6bc0feea1adddcb309 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 15 May 2024 16:03:23 +0200
Subject: [PATCH 007/363] Add setting to SettingsChangesHistory.h

---
 src/Core/SettingsChangesHistory.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index ece48620618..0665d1d6ca6 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -93,6 +93,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
               {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
               {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."},
+              {"input_format_json_infer_variant_from_multi_type_array", 0, 0, "Allows inference of variant type if columns/arrays have multiple types."},
               }},
     {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
               {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"},

From 777e0b313a48975fa51d645fb09a4f6ebfac1d1e Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 15 May 2024 18:51:29 +0200
Subject: [PATCH 008/363] Update 03150_infer_type_variant.sql

Fix failing test
---
 tests/queries/0_stateless/03150_infer_type_variant.sql | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql
index 2ea849248f7..3253ddfe179 100644
--- a/tests/queries/0_stateless/03150_infer_type_variant.sql
+++ b/tests/queries/0_stateless/03150_infer_type_variant.sql
@@ -1,4 +1,4 @@
 SET input_format_json_infer_variant_from_multi_type_array=1;
-SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}');
-SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}');
-SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}');
+SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}') FORMAT Pretty;
+SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}') FORMAT Pretty;
+SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}') FORMAT Pretty;

From dd8d5c46c4678f570b9357dbdd912bcd6f4a267e Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 15 May 2024 18:55:34 +0200
Subject: [PATCH 009/363] Fix style

---
 src/Formats/SchemaInferenceUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index f693916c584..2cbb680af97 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -319,7 +319,7 @@ namespace
         data_types.push_back(variant_type);
         type_indexes.insert(TypeIndex::Variant);
 
-        // make the second type variant as well
+        /// make the second type variant as well
         data_types.push_back(variant_type);
         type_indexes.insert(TypeIndex::Variant);
     }

From 04800f596c4471d10e15c40a533c539c6b549b06 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 24 May 2024 21:20:20 +0200
Subject: [PATCH 010/363] Incorporate review changes

---
 src/Core/Settings.h                           |  2 +-
 src/Core/SettingsChangesHistory.h             |  2 +-
 src/Formats/FormatFactory.cpp                 |  2 +-
 src/Formats/FormatSettings.h                  |  2 +-
 src/Formats/SchemaInferenceUtils.cpp          | 60 +++++++++++++------
 .../03150_infer_type_variant.reference        | 29 +++++----
 .../0_stateless/03150_infer_type_variant.sql  |  3 +-
 7 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ffc337b674f..be7564794e9 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1066,7 +1066,7 @@ class IColumn;
     M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
     M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
     M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
-    M(Bool, input_format_json_infer_variant_from_multi_type_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \
+    M(Bool, input_format_try_infer_variants, false, "Try to infer the Variant type in text formats when there is more than one possible type for column/array elements", 0) \
     M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
     M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 0665d1d6ca6..75fcb538b2b 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -93,7 +93,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
               {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
               {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."},
-              {"input_format_json_infer_variant_from_multi_type_array", 0, 0, "Allows inference of variant type if columns/arrays have multiple types."},
+              {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
               }},
     {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
               {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"},
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 792ac08a5df..2854802453e 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -137,7 +137,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings;
     format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings;
     format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings;
-    format_settings.json.infer_variant_from_multi_type_array = settings.input_format_json_infer_variant_from_multi_type_array;
     format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata;
     format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
     format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name;
@@ -266,6 +265,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth;
     format_settings.client_protocol_version = context->getClientProtocolVersion();
     format_settings.date_time_overflow_behavior = settings.date_time_overflow_behavior;
+    format_settings.try_infer_variant = settings.input_format_try_infer_variants;
 
     /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
     if (format_settings.schema.is_server)
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 927a7e691d8..a2a9e75bd44 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -36,6 +36,7 @@ struct FormatSettings
     bool decimal_trailing_zeros = false;
     bool defaults_for_omitted_fields = true;
     bool is_writing_to_terminal = false;
+    bool try_infer_variant = false;
 
     bool seekable_read = true;
     UInt64 max_rows_to_read_for_schema_inference = 25000;
@@ -223,7 +224,6 @@ struct FormatSettings
         bool compact_allow_variable_number_of_columns = false;
         bool try_infer_objects_as_tuples = false;
         bool infer_incomplete_types_as_strings = true;
-        bool infer_variant_from_multi_type_array = false;
         bool throw_on_bad_escape_sequence = true;
         bool ignore_unnecessary_fields = true;
     } json{};
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 2cbb680af97..298127cad68 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -239,6 +239,16 @@ namespace
         return true;
     }
 
+    bool checkIfTypesContainVariant(const DataTypes & types)
+    {
+        for (size_t i = 0; i < types.size(); ++i)
+        {
+            if (isVariant(types[i]))
+                return true;
+        }
+        return false;
+    }
+
     void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         type_indexes.clear();
@@ -308,20 +318,31 @@ namespace
         type_indexes.erase(TypeIndex::UInt64);
     }
 
-    /// if setting input_format_json_infer_variant_from_multi_type_array is true
+    /// if setting try_infer_variant is true
     /// and nested types are not equal then we convert to type variant.
     void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
+        auto typesAreEqual = checkIfTypesAreEqual(data_types);
+        auto typesContainVariant = checkIfTypesContainVariant(data_types);
+        if (typesAreEqual || typesContainVariant)
+            return;
+
+        DataTypes new_data_types;
+        TypeIndexesSet new_type_indexes;
+
         auto variant_type = std::make_shared<DataTypeVariant>(data_types);
-        /// replace separate types with a single variant type
+        size_t i = 0;
+        while (i != data_types.size())
+        {
+            new_data_types.push_back(variant_type);
+            new_type_indexes.insert(TypeIndex::Variant);
+            i++;
+        }
+
         data_types.clear();
         type_indexes.clear();
-        data_types.push_back(variant_type);
-        type_indexes.insert(TypeIndex::Variant);
-
-        /// make the second type variant as well
-        data_types.push_back(variant_type);
-        type_indexes.insert(TypeIndex::Variant);
+        data_types = new_data_types;
+        type_indexes = new_type_indexes;
     }
 
     /// If we have only Date and DateTime types, convert Date to DateTime,
@@ -661,16 +682,14 @@ namespace
             if (settings.try_infer_dates || settings.try_infer_datetimes)
                 transformDatesAndDateTimes(data_types, type_indexes);
 
+            if (settings.try_infer_variant)
+                transformVariant(data_types, type_indexes);
+
             if constexpr (!is_json)
                 return;
 
             /// Check settings specific for JSON formats.
 
-            if (settings.json.infer_variant_from_multi_type_array)
-            {
-                transformVariant(data_types, type_indexes);
-            }
-
             /// Convert numbers inferred from strings back to strings if needed.
             if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings)
                 transformJSONNumbersBackToString(data_types, settings, type_indexes, json_info);
@@ -685,6 +704,10 @@ namespace
 
             if (settings.json.try_infer_objects_as_tuples)
                 mergeJSONPaths(data_types, type_indexes, settings, json_info);
+
+            if (settings.try_infer_variant)
+                transformVariant(data_types, type_indexes);
+
         };
 
         auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
@@ -696,14 +719,12 @@ namespace
             /// If there is at least one non Nothing type, change all Nothing types to it.
             transformNothingComplexTypes(data_types, type_indexes);
 
+            if (settings.try_infer_variant)
+                transformVariant(data_types, type_indexes);
+
             if constexpr (!is_json)
                 return;
 
-            if (settings.json.infer_variant_from_multi_type_array)
-            {
-                transformVariant(data_types, type_indexes);
-            }
-
             /// Convert JSON tuples with same nested types to arrays.
             transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes);
 
@@ -715,6 +736,9 @@ namespace
 
             if (json_info && json_info->allow_merging_named_tuples)
                 mergeNamedTuples(data_types, type_indexes, settings, json_info);
+
+            if (settings.try_infer_variant)
+                transformVariant(data_types, type_indexes);
         };
 
         transformTypesRecursively(types, transform_simple_types, transform_complex_types);
diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference
index ffb4209eadb..f3f53057845 100644
--- a/tests/queries/0_stateless/03150_infer_type_variant.reference
+++ b/tests/queries/0_stateless/03150_infer_type_variant.reference
@@ -2,7 +2,7 @@
    ┃ arr              ┃ toTypeName(arr)                     ┃
    ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
 1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│
-   │                  │…    a Int64)))                      │
+   │                  │…    a Nullable(Int64))))            │
    └──────────────────┴─────────────────────────────────────┘
    ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
    ┃ x     ┃ toTypeName(x)          ┃
@@ -11,12 +11,21 @@
    ├───────┼────────────────────────┤
 2. │ Hello │ Variant(Int64, String) │
    └───────┴────────────────────────┘
-   ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-   ┃ x       ┃ toTypeName(x)                ┃
-   ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-1. │ [1,2,3] │ Variant(Array(Int64), Tuple(…│
-   │         │…    a Int64))                │
-   ├─────────┼──────────────────────────────┤
-2. │ (42)    │ Variant(Array(Int64), Tuple(…│
-   │         │…    a Int64))                │
-   └─────────┴──────────────────────────────┘
+   ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+   ┃ x       ┃ toTypeName(x)                          ┃
+   ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple(…│
+   │         │…    a Nullable(Int64)))                │
+   ├─────────┼────────────────────────────────────────┤
+2. │ (42)    │ Variant(Array(Nullable(Int64)), Tuple(…│
+   │         │…    a Nullable(Int64)))                │
+   └─────────┴────────────────────────────────────────┘
+   ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
+   ┃ c1 ┃ toTypeName(c1)  ┃ c2           ┃ toTypeName(c2)   ┃
+   ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
+1. │  1 │ Nullable(Int64) │ Hello World! │ Nullable(String) │
+   ├────┼─────────────────┼──────────────┼──────────────────┤
+2. │  2 │ Nullable(Int64) │ [1,2,3]      │ Nullable(String) │
+   ├────┼─────────────────┼──────────────┼──────────────────┤
+3. │  3 │ Nullable(Int64) │ 2020-01-01   │ Nullable(String) │
+   └────┴─────────────────┴──────────────┴──────────────────┘
diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql
index 3253ddfe179..45126ccd471 100644
--- a/tests/queries/0_stateless/03150_infer_type_variant.sql
+++ b/tests/queries/0_stateless/03150_infer_type_variant.sql
@@ -1,4 +1,5 @@
-SET input_format_json_infer_variant_from_multi_type_array=1;
+SET input_format_try_infer_variants=1;
 SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}') FORMAT Pretty;
 SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}') FORMAT Pretty;
 SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}') FORMAT Pretty;
+SELECT c1, toTypeName(c1), c2, toTypeName(c2) FROM format('CSV', '1,Hello World!\n2,"[1,2,3]"\n3,"2020-01-01"\n') FORMAT Pretty;
\ No newline at end of file

From 655262d1a1b21d85f4fbe284e0835065bcca379b Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 5 Jun 2024 14:34:17 +0200
Subject: [PATCH 011/363] Fix issue with nullables

---
 src/Formats/SchemaInferenceUtils.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 298127cad68..43120cb7b22 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -1547,11 +1547,13 @@ DataTypePtr makeNullableRecursively(DataTypePtr type)
         DataTypes nested_types;
         for (const auto & nested_type: variant_type->getVariants())
         {
-            /// unlike tuple or array, here we do not want to make any of the variants nullable
-            /// so we do not call makeNullableRecursively
-            nested_types.push_back(nested_type);
+            auto is_low_cardinality = nested_type->lowCardinality();
+            auto has_sub_types = nested_type->haveSubtypes();
+            if (!is_low_cardinality && has_sub_types)
+                nested_types.push_back(makeNullableRecursively(nested_type));
+            else
+                nested_types.push_back(nested_type);
         }
-
         return std::make_shared<DataTypeVariant>(nested_types);
     }
 

From 418fc7f4438abd25eae4928f36ff0c3fef2395f8 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 6 Jun 2024 10:02:19 +0200
Subject: [PATCH 012/363] Fix incorrect inference for other formats

---
 src/Formats/SchemaInferenceUtils.cpp          | 29 +++++++++--
 .../03150_infer_type_variant.reference        | 48 +++++++++----------
 2 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 43120cb7b22..a8b5d4343f5 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -318,19 +318,40 @@ namespace
         type_indexes.erase(TypeIndex::UInt64);
     }
 
-    /// if setting try_infer_variant is true
-    /// and nested types are not equal then we convert to type variant.
+    /// if setting 'try_infer_variant' is true then we convert to type variant.
     void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         auto typesAreEqual = checkIfTypesAreEqual(data_types);
         auto typesContainVariant = checkIfTypesContainVariant(data_types);
-        if (typesAreEqual || typesContainVariant)
+        if (typesAreEqual)
             return;
 
         DataTypes new_data_types;
         TypeIndexesSet new_type_indexes;
+        std::shared_ptr<DataTypeVariant> variant_type;
+
+        /// extract the nested types of variant and make a new variant with the nested types and the other type.
+        /// eg. Type 1: variant<String, Array>, Type 2: Date -> variant<String, Array, Date>.
+        if (typesContainVariant)
+        {
+            DataTypes extracted_types;
+            for (size_t i=0; i<data_types.size(); i++)
+            {
+                if (isVariant(data_types[i]))
+                {
+                    if (const auto * variant = typeid_cast<const DataTypeVariant *>(data_types[i].get()))
+                        extracted_types = variant->getVariants();
+                }
+                else
+                    extracted_types.push_back(data_types[i]);
+            }
+            variant_type = std::make_shared<DataTypeVariant>(extracted_types);
+        }
+        else
+        {
+            variant_type = std::make_shared<DataTypeVariant>(data_types);
+        }
 
-        auto variant_type = std::make_shared<DataTypeVariant>(data_types);
         size_t i = 0;
         while (i != data_types.size())
         {
diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference
index f3f53057845..a5f56cb3618 100644
--- a/tests/queries/0_stateless/03150_infer_type_variant.reference
+++ b/tests/queries/0_stateless/03150_infer_type_variant.reference
@@ -1,9 +1,9 @@
-   ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-   ┃ arr              ┃ toTypeName(arr)                     ┃
-   ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│
-   │                  │…    a Nullable(Int64))))            │
-   └──────────────────┴─────────────────────────────────────┘
+   ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+   ┃ arr              ┃ toTypeName(arr)                                             ┃
+   ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(
+    a Nullable(Int64)))) │
+   └──────────────────┴─────────────────────────────────────────────────────────────┘
    ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
    ┃ x     ┃ toTypeName(x)          ┃
    ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
@@ -11,21 +11,21 @@
    ├───────┼────────────────────────┤
 2. │ Hello │ Variant(Int64, String) │
    └───────┴────────────────────────┘
-   ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-   ┃ x       ┃ toTypeName(x)                          ┃
-   ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple(…│
-   │         │…    a Nullable(Int64)))                │
-   ├─────────┼────────────────────────────────────────┤
-2. │ (42)    │ Variant(Array(Nullable(Int64)), Tuple(…│
-   │         │…    a Nullable(Int64)))                │
-   └─────────┴────────────────────────────────────────┘
-   ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
-   ┃ c1 ┃ toTypeName(c1)  ┃ c2           ┃ toTypeName(c2)   ┃
-   ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
-1. │  1 │ Nullable(Int64) │ Hello World! │ Nullable(String) │
-   ├────┼─────────────────┼──────────────┼──────────────────┤
-2. │  2 │ Nullable(Int64) │ [1,2,3]      │ Nullable(String) │
-   ├────┼─────────────────┼──────────────┼──────────────────┤
-3. │  3 │ Nullable(Int64) │ 2020-01-01   │ Nullable(String) │
-   └────┴─────────────────┴──────────────┴──────────────────┘
+   ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+   ┃ x       ┃ toTypeName(x)                                                 ┃
+   ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple(
+    a Nullable(Int64))) │
+   ├─────────┼───────────────────────────────────────────────────────────────┤
+2. │ (42)    │ Variant(Array(Nullable(Int64)), Tuple(
+    a Nullable(Int64))) │
+   └─────────┴───────────────────────────────────────────────────────────────┘
+   ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+   ┃ c1 ┃ toTypeName(c1)  ┃ c2           ┃ toTypeName(c2)                          ┃
+   ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+1. │  1 │ Nullable(Int64) │ Hello World! │ Variant(Array(Nullable(Int64)), String) │
+   ├────┼─────────────────┼──────────────┼─────────────────────────────────────────┤
+2. │  2 │ Nullable(Int64) │ [1,2,3]      │ Variant(Array(Nullable(Int64)), String) │
+   ├────┼─────────────────┼──────────────┼─────────────────────────────────────────┤
+3. │  3 │ Nullable(Int64) │ 2020-01-01   │ Variant(Array(Nullable(Int64)), String) │
+   └────┴─────────────────┴──────────────┴─────────────────────────────────────────┘

From 9cf11a210f07110676b373b864ea098583d87ff6 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 11 Jun 2024 11:11:06 +0200
Subject: [PATCH 013/363] Review changes

---
 src/Core/SettingsChangesHistory.h             |  3 +-
 src/Formats/SchemaInferenceUtils.cpp          | 73 ++++++-------------
 .../03150_infer_type_variant.reference        | 24 +++---
 3 files changed, 35 insertions(+), 65 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 3f743ef42bf..661ecc607ba 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -92,6 +92,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
               {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
               {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
+              {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
               }},
     {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"},
               {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
@@ -103,8 +104,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
               {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
               {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
-              {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."},
-              {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
               {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
               {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},
               {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."},
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index a8b5d4343f5..b7c71a95b29 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -239,16 +239,6 @@ namespace
         return true;
     }
 
-    bool checkIfTypesContainVariant(const DataTypes & types)
-    {
-        for (size_t i = 0; i < types.size(); ++i)
-        {
-            if (isVariant(types[i]))
-                return true;
-        }
-        return false;
-    }
-
     void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         type_indexes.clear();
@@ -321,49 +311,28 @@ namespace
     /// if setting 'try_infer_variant' is true then we convert to type variant.
     void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
-        auto typesAreEqual = checkIfTypesAreEqual(data_types);
-        auto typesContainVariant = checkIfTypesContainVariant(data_types);
-        if (typesAreEqual)
+        if (checkIfTypesAreEqual(data_types))
             return;
 
-        DataTypes new_data_types;
-        TypeIndexesSet new_type_indexes;
-        std::shared_ptr<DataTypeVariant> variant_type;
-
-        /// extract the nested types of variant and make a new variant with the nested types and the other type.
-        /// eg. Type 1: variant<String, Array>, Type 2: Date -> variant<String, Array, Date>.
-        if (typesContainVariant)
+        DataTypes variant_types;
+        for (const auto & type : data_types)
         {
-            DataTypes extracted_types;
-            for (size_t i=0; i<data_types.size(); i++)
+            if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(type.get()))
             {
-                if (isVariant(data_types[i]))
-                {
-                    if (const auto * variant = typeid_cast<const DataTypeVariant *>(data_types[i].get()))
-                        extracted_types = variant->getVariants();
-                }
-                else
-                    extracted_types.push_back(data_types[i]);
+                const auto & current_variants = variant_type->getVariants();
+                variant_types.insert(variant_types.end(), current_variants.begin(), current_variants.end());
+            }
+            else
+            {
+                variant_types.push_back(type);
             }
-            variant_type = std::make_shared<DataTypeVariant>(extracted_types);
-        }
-        else
-        {
-            variant_type = std::make_shared<DataTypeVariant>(data_types);
         }
 
-        size_t i = 0;
-        while (i != data_types.size())
-        {
-            new_data_types.push_back(variant_type);
-            new_type_indexes.insert(TypeIndex::Variant);
-            i++;
-        }
+        auto variant_type = std::make_shared<DataTypeVariant>(variant_types);
 
-        data_types.clear();
-        type_indexes.clear();
-        data_types = new_data_types;
-        type_indexes = new_type_indexes;
+        for (auto & type : data_types)
+            type = variant_type;
+        type_indexes = {TypeIndex::Variant};
     }
 
     /// If we have only Date and DateTime types, convert Date to DateTime,
@@ -703,11 +672,12 @@ namespace
             if (settings.try_infer_dates || settings.try_infer_datetimes)
                 transformDatesAndDateTimes(data_types, type_indexes);
 
-            if (settings.try_infer_variant)
-                transformVariant(data_types, type_indexes);
-
             if constexpr (!is_json)
+            {
+                if (settings.try_infer_variant)
+                    transformVariant(data_types, type_indexes);
                 return;
+            }
 
             /// Check settings specific for JSON formats.
 
@@ -740,11 +710,12 @@ namespace
             /// If there is at least one non Nothing type, change all Nothing types to it.
             transformNothingComplexTypes(data_types, type_indexes);
 
-            if (settings.try_infer_variant)
-                transformVariant(data_types, type_indexes);
-
             if constexpr (!is_json)
+            {
+                if (settings.try_infer_variant)
+                    transformVariant(data_types, type_indexes);
                 return;
+            }
 
             /// Convert JSON tuples with same nested types to arrays.
             transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes);
diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference
index a5f56cb3618..a43fa1e1227 100644
--- a/tests/queries/0_stateless/03150_infer_type_variant.reference
+++ b/tests/queries/0_stateless/03150_infer_type_variant.reference
@@ -1,16 +1,16 @@
-   ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-   ┃ arr              ┃ toTypeName(arr)                                             ┃
-   ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(
+   ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+   ┃ arr                ┃ toTypeName(arr)                                      ┃
+   ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+1. │ ['1','Hello',(32)] │ Array(Variant(String, Tuple(
     a Nullable(Int64)))) │
-   └──────────────────┴─────────────────────────────────────────────────────────────┘
-   ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
-   ┃ x     ┃ toTypeName(x)          ┃
-   ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
-1. │ 42    │ Variant(Int64, String) │
-   ├───────┼────────────────────────┤
-2. │ Hello │ Variant(Int64, String) │
-   └───────┴────────────────────────┘
+   └────────────────────┴──────────────────────────────────────────────────────┘
+   ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
+   ┃ x     ┃ toTypeName(x)    ┃
+   ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
+1. │ 42    │ Nullable(String) │
+   ├───────┼──────────────────┤
+2. │ Hello │ Nullable(String) │
+   └───────┴──────────────────┘
    ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
    ┃ x       ┃ toTypeName(x)                                                 ┃
    ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩

From 548c90901020317669a10d191a0b6f8a7d0a0511 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 11 Jun 2024 12:14:36 +0200
Subject: [PATCH 014/363] Add documentation

---
 docs/en/operations/settings/settings-formats.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 6aae1ea62e5..8bbb469547b 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -194,6 +194,17 @@ If enabled, ClickHouse will try to infer type `DateTime64` from string fields in
 
 Enabled by default.
 
+## input_format_try_infer_variants {#input_format_try_infer_variants}
+
+If enabled, ClickHouse will try to infer type [`Variant`](../../sql-reference/data-types/variant.md) in schema inference for text formats when there is more than one possible type for column/array elements.
+
+Possible values:
+
+- 0 — Disabled.
+- 1 — Enabled.
+
+Default value: `0`.
+
 ## date_time_input_format {#date_time_input_format}
 
 Allows choosing a parser of the text representation of date and time.

From bad5e27bbffa9c1f6727a0416edcb135dadcc1fe Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Tue, 11 Jun 2024 13:32:34 +0200
Subject: [PATCH 015/363] Update src/Formats/SchemaInferenceUtils.cpp

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Formats/SchemaInferenceUtils.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 0ac8b32f8aa..240830013c6 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -1539,9 +1539,7 @@ DataTypePtr makeNullableRecursively(DataTypePtr type)
         DataTypes nested_types;
         for (const auto & nested_type: variant_type->getVariants())
         {
-            auto is_low_cardinality = nested_type->lowCardinality();
-            auto has_sub_types = nested_type->haveSubtypes();
-            if (!is_low_cardinality && has_sub_types)
+            if (!nested_type->lowCardinality() && nested_type->haveSubtypes())
                 nested_types.push_back(makeNullableRecursively(nested_type));
             else
                 nested_types.push_back(nested_type);

From e91dd71d4e55fe80d1c230a87eee7ad84333d9c3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 17 Jun 2024 07:01:06 +0200
Subject: [PATCH 016/363] Settings normalization

---
 src/Interpreters/InterpreterSetQuery.cpp | 26 ++++++++++--------------
 src/Parsers/ParserQueryWithOutput.cpp    | 11 ----------
 2 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp
index 7e68fc5c4c1..cac44c7747b 100644
--- a/src/Interpreters/InterpreterSetQuery.cpp
+++ b/src/Interpreters/InterpreterSetQuery.cpp
@@ -9,6 +9,7 @@
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 
+
 namespace DB
 {
 
@@ -45,9 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel
     // It is flattened later, when we process UNION ALL/DISTINCT.
     const auto * last_select = children.back()->as<ASTSelectQuery>();
     if (last_select && last_select->settings())
-    {
         InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext();
-    }
 }
 
 void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_)
@@ -55,6 +54,16 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     if (!ast)
         return;
 
+    /// First apply the outermost settings. Then they could be overridden by deeper settings.
+    if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
+    {
+        if (query_with_output->settings_ast)
+            InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext();
+
+        if (const auto * create_query = ast->as<ASTCreateQuery>(); create_query && create_query->select)
+            applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context_);
+    }
+
     if (const auto * select_query = ast->as<ASTSelectQuery>())
     {
         if (auto new_settings = select_query->settings())
@@ -71,19 +80,6 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
 
         applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
     }
-    else if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
-    {
-        if (query_with_output->settings_ast)
-            InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext();
-
-        if (const auto * create_query = ast->as<ASTCreateQuery>())
-        {
-            if (create_query->select)
-            {
-                applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context_);
-            }
-        }
-    }
     else if (auto * insert_query = ast->as<ASTInsertQuery>())
     {
         context_->setInsertFormat(insert_query->format);
diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp
index 6d8a1258555..ac8f7d560e0 100644
--- a/src/Parsers/ParserQueryWithOutput.cpp
+++ b/src/Parsers/ParserQueryWithOutput.cpp
@@ -25,7 +25,6 @@
 #include <Parsers/ParserTablePropertiesQuery.h>
 #include <Parsers/ParserWatchQuery.h>
 #include <Parsers/ParserDescribeCacheQuery.h>
-#include <Parsers/QueryWithOutputSettingsPushDownVisitor.h>
 #include <Parsers/Access/ParserShowAccessEntitiesQuery.h>
 #include <Parsers/Access/ParserShowAccessQuery.h>
 #include <Parsers/Access/ParserShowCreateAccessEntityQuery.h>
@@ -198,16 +197,6 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
             if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
                 return false;
             query_with_output.children.push_back(query_with_output.settings_ast);
-
-            // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery)
-            // Pass them manually, to apply in InterpreterSelectQuery::initSettings()
-            if (query->as<ASTSelectWithUnionQuery>())
-            {
-                auto settings = query_with_output.settings_ast->clone();
-                assert_cast<ASTSetQuery *>(settings.get())->print_in_format = false;
-                QueryWithOutputSettingsPushDownVisitor::Data data{settings};
-                QueryWithOutputSettingsPushDownVisitor(data).visit(query);
-            }
         }
         else
             break;

From f5da9e424075fa755edd1a869e199f4861011be2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 17 Jun 2024 07:01:44 +0200
Subject: [PATCH 017/363] Add a test

---
 ...QueryWithOutputSettingsPushDownVisitor.cpp | 56 -------------------
 .../QueryWithOutputSettingsPushDownVisitor.h  | 39 -------------
 .../03172_format_settings_clauses.reference   | 14 +++++
 .../03172_format_settings_clauses.sql         | 30 ++++++++++
 4 files changed, 44 insertions(+), 95 deletions(-)
 delete mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp
 delete mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.h
 create mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.reference
 create mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.sql

diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp
deleted file mode 100644
index 8cf0d0063ae..00000000000
--- a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-#include <Common/SettingsChanges.h>
-#include <Parsers/QueryWithOutputSettingsPushDownVisitor.h>
-#include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Parsers/ASTSelectQuery.h>
-#include <Parsers/ASTSetQuery.h>
-#include <Parsers/ASTSubquery.h>
-
-#include <iterator>
-#include <algorithm>
-
-namespace DB
-{
-
-bool QueryWithOutputSettingsPushDownMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
-{
-    if (node->as<ASTSelectWithUnionQuery>())
-        return true;
-    if (node->as<ASTSubquery>())
-        return true;
-    if (child->as<ASTSelectQuery>())
-        return true;
-    return false;
-}
-
-void QueryWithOutputSettingsPushDownMatcher::visit(ASTPtr & ast, Data & data)
-{
-    if (auto * select_query = ast->as<ASTSelectQuery>())
-        visit(*select_query, ast, data);
-}
-
-void QueryWithOutputSettingsPushDownMatcher::visit(ASTSelectQuery & select_query, ASTPtr &, Data & data)
-{
-    ASTPtr select_settings_ast = select_query.settings();
-    if (!select_settings_ast)
-    {
-        select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, data.settings_ast->clone());
-        return;
-    }
-
-    SettingsChanges & select_settings = select_settings_ast->as<ASTSetQuery &>().changes;
-    SettingsChanges & settings = data.settings_ast->as<ASTSetQuery &>().changes;
-
-    for (auto & setting : settings)
-    {
-        auto it = std::find_if(select_settings.begin(), select_settings.end(), [&](auto & select_setting)
-        {
-            return select_setting.name == setting.name;
-        });
-        if (it == select_settings.end())
-            select_settings.push_back(setting);
-        else
-            it->value = setting.value;
-    }
-}
-
-}
diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h
deleted file mode 100644
index fde8a07b555..00000000000
--- a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#pragma once
-
-#include <Parsers/IAST.h>
-#include <Interpreters/InDepthNodeVisitor.h>
-
-namespace DB
-{
-
-class ASTSelectQuery;
-struct SettingChange;
-class SettingsChanges;
-
-/// Pushdown SETTINGS clause that goes after FORMAT to the SELECT query:
-/// (since settings after FORMAT parsed separately not in the ParserSelectQuery but in ParserQueryWithOutput)
-///
-///     SELECT 1                             FORMAT Null SETTINGS max_block_size = 1 ->
-///     SELECT 1 SETTINGS max_block_size = 1 FORMAT Null SETTINGS max_block_size = 1
-///
-/// Otherwise settings after FORMAT will not be applied.
-class QueryWithOutputSettingsPushDownMatcher
-{
-public:
-    using Visitor = InDepthNodeVisitor<QueryWithOutputSettingsPushDownMatcher, true>;
-
-    struct Data
-    {
-        const ASTPtr & settings_ast;
-    };
-
-    static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
-    static void visit(ASTPtr & ast, Data & data);
-
-private:
-    static void visit(ASTSelectQuery &, ASTPtr &, Data &);
-};
-
-using QueryWithOutputSettingsPushDownVisitor = QueryWithOutputSettingsPushDownMatcher::Visitor;
-
-}
diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.reference b/tests/queries/0_stateless/03172_format_settings_clauses.reference
new file mode 100644
index 00000000000..8a98b137f4b
--- /dev/null
+++ b/tests/queries/0_stateless/03172_format_settings_clauses.reference
@@ -0,0 +1,14 @@
+1
+2
+1
+2
+1
+2
+1
+1
+3
+3
+3
+3
+3
+1
diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.sql b/tests/queries/0_stateless/03172_format_settings_clauses.sql
new file mode 100644
index 00000000000..0d1aa4dcfbb
--- /dev/null
+++ b/tests/queries/0_stateless/03172_format_settings_clauses.sql
@@ -0,0 +1,30 @@
+SET max_block_size = 10, max_threads = 1;
+
+-- Take the following example:
+SELECT 1 UNION ALL SELECT 2 FORMAT TSV;
+
+-- Each subquery can be put in parentheses and have its own settings:
+(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV;
+
+-- And the whole query can have settings:
+(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV SETTINGS max_block_size = 3;
+
+-- A single query with output is parsed in the same way as the UNION ALL chain:
+SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV SETTINGS max_block_size = 3;
+
+-- So while these forms have a slightly different meaning, they both exist:
+SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV;
+SELECT getSetting('max_block_size') FORMAT TSV SETTINGS max_block_size = 3;
+
+-- And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order.
+-- But while this work:
+(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) FORMAT TSV SETTINGS max_block_size = 3;
+
+-- This does not work automatically, unless we explicitly allow different orders:
+(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) SETTINGS max_block_size = 3 FORMAT TSV;
+
+-- Inevitably, we allow this:
+SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 SETTINGS max_block_size = 3 FORMAT TSV;
+/*^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^*/
+-- Because this part is consumed into ASTSelectWithUnionQuery
+-- and the rest into ASTQueryWithOutput.

From 778807a8883901debc1bfeb72e17b37eb06bcaf0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 17 Jun 2024 08:22:36 +0200
Subject: [PATCH 018/363] Fix error; remove garbage

---
 src/Client/ClientBase.cpp | 33 +++------------------------------
 1 file changed, 3 insertions(+), 30 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 854cc3fef8b..ad4964b4b7c 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -60,6 +60,7 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Interpreters/ProfileEventsExt.h>
+#include <Interpreters/InterpreterSetQuery.h>
 #include <IO/WriteBufferFromOStream.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/CompressionMethod.h>
@@ -1937,41 +1938,13 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
                 global_context->setSettings(*old_settings);
         });
 
-        auto apply_query_settings = [&](const IAST & settings_ast)
-        {
-            if (!old_settings)
-                old_settings.emplace(global_context->getSettingsRef());
-            global_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
-            global_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
-        };
-
-        const auto * insert = parsed_query->as<ASTInsertQuery>();
-        if (const auto * select = parsed_query->as<ASTSelectQuery>(); select && select->settings())
-            apply_query_settings(*select->settings());
-        else if (const auto * select_with_union = parsed_query->as<ASTSelectWithUnionQuery>())
-        {
-            const ASTs & children = select_with_union->list_of_selects->children;
-            if (!children.empty())
-            {
-                // On the client it is enough to apply settings only for the
-                // last SELECT, since the only thing that is important to apply
-                // on the client is format settings.
-                const auto * last_select = children.back()->as<ASTSelectQuery>();
-                if (last_select && last_select->settings())
-                {
-                    apply_query_settings(*last_select->settings());
-                }
-            }
-        }
-        else if (const auto * query_with_output = parsed_query->as<ASTQueryWithOutput>(); query_with_output && query_with_output->settings_ast)
-            apply_query_settings(*query_with_output->settings_ast);
-        else if (insert && insert->settings_ast)
-            apply_query_settings(*insert->settings_ast);
+        InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context);
 
         if (!connection->checkConnected(connection_parameters.timeouts))
             connect();
 
         ASTPtr input_function;
+        const auto * insert = parsed_query->as<ASTInsertQuery>();
         if (insert && insert->select)
             insert->tryFindInputFunction(input_function);
 

From 87a2e5f39018ef16ab6fdd79ad934dce6c45aaf0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 17 Jun 2024 08:30:55 +0200
Subject: [PATCH 019/363] Fix error

---
 src/Client/ClientBase.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index ad4964b4b7c..958a1f50813 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1932,10 +1932,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
 
     {
         /// Temporarily apply query settings to context.
-        std::optional<Settings> old_settings;
+        Settings old_settings = global_context->getSettings();
         SCOPE_EXIT_SAFE({
-            if (old_settings)
-                global_context->setSettings(*old_settings);
+            global_context->setSettings(old_settings);
         });
 
         InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context);

From 67a539292e5e62f0bf470d11a5b224ef105bea02 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 17 Jun 2024 08:33:11 +0200
Subject: [PATCH 020/363] Update test

---
 tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference | 4 ++--
 tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference
index 22405bf1866..a8b99666654 100644
--- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference
+++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference
@@ -1,7 +1,7 @@
 1
 1
 1
-1
-1
+2
+1
 2
 2
diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh
index b70c28422c9..173cc949500 100755
--- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh
+++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh
@@ -13,7 +13,7 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) FORMAT CSV SETTINGS max_block_size = 1'
 # push down append
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_compress_block_size = 1 FORMAT CSV SETTINGS max_block_size = 1'
-# overwrite on push down (since these settings goes latest)
+# not overwrite on push down
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 2 FORMAT CSV SETTINGS max_block_size = 1'
 # on push-down
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 1 FORMAT CSV'

From 4817657375a37c374eb4be72793cba434c16a815 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 17 Jun 2024 23:43:03 +0200
Subject: [PATCH 021/363] Revert "Revert "Fix AWS ECS""

---
 contrib/aws-crt-cpp                           |  2 +-
 .../ProxyConfigurationResolverProvider.cpp    | 35 +++++++------------
 .../ProxyConfigurationResolverProvider.h      |  5 ++-
 src/IO/ReadWriteBufferFromHTTP.cpp            |  2 +-
 src/IO/S3/Client.cpp                          |  6 ++--
 src/IO/S3/PocoHTTPClient.cpp                  | 20 +++++++++--
 src/IO/S3/PocoHTTPClient.h                    | 14 ++++----
 src/IO/S3/PocoHTTPClientFactory.cpp           |  5 ++-
 .../0_stateless/03170_ecs_crash.reference     |  4 +++
 tests/queries/0_stateless/03170_ecs_crash.sh  |  9 +++++
 10 files changed, 61 insertions(+), 41 deletions(-)
 create mode 100644 tests/queries/0_stateless/03170_ecs_crash.reference
 create mode 100755 tests/queries/0_stateless/03170_ecs_crash.sh

diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp
index f532d6abc0d..0217761556a 160000
--- a/contrib/aws-crt-cpp
+++ b/contrib/aws-crt-cpp
@@ -1 +1 @@
-Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0
+Subproject commit 0217761556a7ba7ec537fe933d0ab1159096746e
diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp
index b06073121e7..a46837bfdb9 100644
--- a/src/Common/ProxyConfigurationResolverProvider.cpp
+++ b/src/Common/ProxyConfigurationResolverProvider.cpp
@@ -112,9 +112,8 @@ namespace
         return configuration.has(config_prefix + ".uri");
     }
 
-    /*
-     * New syntax requires protocol prefix "<http> or <https>"
-     * */
+    /* New syntax requires protocol prefix "<http> or <https>"
+     */
     std::optional<std::string> getProtocolPrefix(
         ProxyConfiguration::Protocol request_protocol,
         const String & config_prefix,
@@ -130,22 +129,18 @@ namespace
         return protocol_prefix;
     }
 
-    template <bool new_syntax>
     std::optional<std::string> calculatePrefixBasedOnSettingsSyntax(
+        bool new_syntax,
         ProxyConfiguration::Protocol request_protocol,
         const String & config_prefix,
         const Poco::Util::AbstractConfiguration & configuration
     )
     {
         if (!configuration.has(config_prefix))
-        {
             return std::nullopt;
-        }
 
-        if constexpr (new_syntax)
-        {
+        if (new_syntax)
             return getProtocolPrefix(request_protocol, config_prefix, configuration);
-        }
 
         return config_prefix;
     }
@@ -155,24 +150,21 @@ std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::
     Protocol request_protocol,
     const Poco::Util::AbstractConfiguration & configuration)
 {
-    if (auto resolver = getFromSettings(request_protocol, "proxy", configuration))
-    {
+    if (auto resolver = getFromSettings(true, request_protocol, "proxy", configuration))
         return resolver;
-    }
 
     return std::make_shared<EnvironmentProxyConfigurationResolver>(
         request_protocol,
         isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
 }
 
-template <bool is_new_syntax>
 std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::getFromSettings(
+    bool new_syntax,
     Protocol request_protocol,
     const String & config_prefix,
-    const Poco::Util::AbstractConfiguration & configuration
-)
+    const Poco::Util::AbstractConfiguration & configuration)
 {
-    auto prefix_opt = calculatePrefixBasedOnSettingsSyntax<is_new_syntax>(request_protocol, config_prefix, configuration);
+    auto prefix_opt = calculatePrefixBasedOnSettingsSyntax(new_syntax, request_protocol, config_prefix, configuration);
 
     if (!prefix_opt)
     {
@@ -195,20 +187,17 @@ std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::
 std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
     Protocol request_protocol,
     const String & config_prefix,
-    const Poco::Util::AbstractConfiguration & configuration
-)
+    const Poco::Util::AbstractConfiguration & configuration)
 {
-    /*
-     * First try to get it from settings only using the combination of config_prefix and configuration.
+    /* First try to get it from settings only using the combination of config_prefix and configuration.
      * This logic exists for backward compatibility with old S3 storage specific proxy configuration.
      * */
-    if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings<false>(request_protocol, config_prefix + ".proxy", configuration))
+    if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(false, request_protocol, config_prefix + ".proxy", configuration))
     {
         return resolver;
     }
 
-    /*
-     * In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings.
+    /* In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings.
      * Falls back to Environment resolver if no configuration is found.
      * */
     return ProxyConfigurationResolverProvider::get(request_protocol, configuration);
diff --git a/src/Common/ProxyConfigurationResolverProvider.h b/src/Common/ProxyConfigurationResolverProvider.h
index ebf22f7e92a..357b218e499 100644
--- a/src/Common/ProxyConfigurationResolverProvider.h
+++ b/src/Common/ProxyConfigurationResolverProvider.h
@@ -33,12 +33,11 @@ public:
     );
 
 private:
-    template <bool is_new_syntax = true>
     static std::shared_ptr<ProxyConfigurationResolver> getFromSettings(
+        bool is_new_syntax,
         Protocol protocol,
         const String & config_prefix,
-        const Poco::Util::AbstractConfiguration & configuration
-    );
+        const Poco::Util::AbstractConfiguration & configuration);
 };
 
 }
diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp
index 303ffb744b5..4f883a9b4ed 100644
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@@ -221,7 +221,7 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(
 
     if (iter == http_header_entries.end())
     {
-        http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}", VERSION_STRING));
+        http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}{}", VERSION_STRING, VERSION_OFFICIAL));
     }
 
     if (!delay_initialization && use_external_buffer)
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 9229342b8c1..cbb61deea9f 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -972,10 +972,10 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT
 {
     auto context = Context::getGlobalContextInstance();
     chassert(context);
-    auto proxy_configuration_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::protocolFromString(protocol), context->getConfigRef());
+    auto proxy_configuration_resolver = ProxyConfigurationResolverProvider::get(ProxyConfiguration::protocolFromString(protocol), context->getConfigRef());
 
-    auto per_request_configuration = [=] () { return proxy_configuration_resolver->resolve(); };
-    auto error_report = [=] (const DB::ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); };
+    auto per_request_configuration = [=]{ return proxy_configuration_resolver->resolve(); };
+    auto error_report = [=](const ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); };
 
     auto config = PocoHTTPClientConfiguration(
         per_request_configuration,
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 1cef43530e0..04982f14f36 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -1,4 +1,5 @@
 #include <Poco/Timespan.h>
+#include <Common/config_version.h>
 #include "config.h"
 
 #if USE_AWS_S3
@@ -17,6 +18,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <IO/S3/ProviderType.h>
+#include <Interpreters/Context.h>
 
 #include <aws/core/http/HttpRequest.h>
 #include <aws/core/http/HttpResponse.h>
@@ -29,6 +31,7 @@
 
 #include <boost/algorithm/string.hpp>
 
+
 static const int SUCCESS_RESPONSE_MIN = 200;
 static const int SUCCESS_RESPONSE_MAX = 299;
 
@@ -84,7 +87,7 @@ namespace DB::S3
 {
 
 PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
-        std::function<DB::ProxyConfiguration()> per_request_configuration_,
+        std::function<ProxyConfiguration()> per_request_configuration_,
         const String & force_region_,
         const RemoteHostFilter & remote_host_filter_,
         unsigned int s3_max_redirects_,
@@ -94,7 +97,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
         bool s3_use_adaptive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
-        std::function<void(const DB::ProxyConfiguration &)> error_report_)
+        std::function<void(const ProxyConfiguration &)> error_report_)
     : per_request_configuration(per_request_configuration_)
     , force_region(force_region_)
     , remote_host_filter(remote_host_filter_)
@@ -107,6 +110,8 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
     , s3_use_adaptive_timeouts(s3_use_adaptive_timeouts_)
     , error_report(error_report_)
 {
+    /// This is used to identify configurations created by us.
+    userAgent = std::string(VERSION_FULL) + VERSION_OFFICIAL;
 }
 
 void PocoHTTPClientConfiguration::updateSchemeAndRegion()
@@ -166,6 +171,17 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config
 {
 }
 
+PocoHTTPClient::PocoHTTPClient(const Aws::Client::ClientConfiguration & client_configuration)
+    : timeouts(ConnectionTimeouts()
+       .withConnectionTimeout(Poco::Timespan(client_configuration.connectTimeoutMs * 1000))
+       .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000))
+       .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000))
+       .withTCPKeepAliveTimeout(Poco::Timespan(
+           client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0))),
+    remote_host_filter(Context::getGlobalContextInstance()->getRemoteHostFilter())
+{
+}
+
 std::shared_ptr<Aws::Http::HttpResponse> PocoHTTPClient::MakeRequest(
     const std::shared_ptr<Aws::Http::HttpRequest> & request,
     Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 88251b964e2..18a21649167 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -38,7 +38,7 @@ class PocoHTTPClient;
 
 struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
 {
-    std::function<DB::ProxyConfiguration()> per_request_configuration;
+    std::function<ProxyConfiguration()> per_request_configuration;
     String force_region;
     const RemoteHostFilter & remote_host_filter;
     unsigned int s3_max_redirects;
@@ -54,13 +54,13 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
     size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT;
     size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST;
 
-    std::function<void(const DB::ProxyConfiguration &)> error_report;
+    std::function<void(const ProxyConfiguration &)> error_report;
 
     void updateSchemeAndRegion();
 
 private:
     PocoHTTPClientConfiguration(
-        std::function<DB::ProxyConfiguration()> per_request_configuration_,
+        std::function<ProxyConfiguration()> per_request_configuration_,
         const String & force_region_,
         const RemoteHostFilter & remote_host_filter_,
         unsigned int s3_max_redirects_,
@@ -70,8 +70,7 @@ private:
         bool s3_use_adaptive_timeouts_,
         const ThrottlerPtr & get_request_throttler_,
         const ThrottlerPtr & put_request_throttler_,
-        std::function<void(const DB::ProxyConfiguration &)> error_report_
-    );
+        std::function<void(const ProxyConfiguration &)> error_report_);
 
     /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization.
     friend ClientFactory;
@@ -120,6 +119,7 @@ class PocoHTTPClient : public Aws::Http::HttpClient
 {
 public:
     explicit PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration);
+    explicit PocoHTTPClient(const Aws::Client::ClientConfiguration & client_configuration);
     ~PocoHTTPClient() override = default;
 
     std::shared_ptr<Aws::Http::HttpResponse> MakeRequest(
@@ -166,8 +166,8 @@ protected:
     static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);
     void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const;
 
-    std::function<DB::ProxyConfiguration()> per_request_configuration;
-    std::function<void(const DB::ProxyConfiguration &)> error_report;
+    std::function<ProxyConfiguration()> per_request_configuration;
+    std::function<void(const ProxyConfiguration &)> error_report;
     ConnectionTimeouts timeouts;
     const RemoteHostFilter & remote_host_filter;
     unsigned int s3_max_redirects;
diff --git a/src/IO/S3/PocoHTTPClientFactory.cpp b/src/IO/S3/PocoHTTPClientFactory.cpp
index ef7af2d01ba..abec907778c 100644
--- a/src/IO/S3/PocoHTTPClientFactory.cpp
+++ b/src/IO/S3/PocoHTTPClientFactory.cpp
@@ -15,7 +15,10 @@ namespace DB::S3
 std::shared_ptr<Aws::Http::HttpClient>
 PocoHTTPClientFactory::CreateHttpClient(const Aws::Client::ClientConfiguration & client_configuration) const
 {
-    return std::make_shared<PocoHTTPClient>(static_cast<const PocoHTTPClientConfiguration &>(client_configuration));
+    if (client_configuration.userAgent.starts_with("ClickHouse"))
+        return std::make_shared<PocoHTTPClient>(static_cast<const PocoHTTPClientConfiguration &>(client_configuration));
+    else /// This client is created inside the AWS SDK with default settings to obtain ECS credentials from localhost.
+        return std::make_shared<PocoHTTPClient>(client_configuration);
 }
 
 std::shared_ptr<Aws::Http::HttpRequest> PocoHTTPClientFactory::CreateHttpRequest(
diff --git a/tests/queries/0_stateless/03170_ecs_crash.reference b/tests/queries/0_stateless/03170_ecs_crash.reference
new file mode 100644
index 00000000000..acd7c60768b
--- /dev/null
+++ b/tests/queries/0_stateless/03170_ecs_crash.reference
@@ -0,0 +1,4 @@
+1	2	3
+4	5	6
+7	8	9
+0	0	0
diff --git a/tests/queries/0_stateless/03170_ecs_crash.sh b/tests/queries/0_stateless/03170_ecs_crash.sh
new file mode 100755
index 00000000000..fa6870c4cf2
--- /dev/null
+++ b/tests/queries/0_stateless/03170_ecs_crash.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# Previous versions crashed in attempt to use this authentication method (regardless of whether it was able to authenticate):
+AWS_CONTAINER_CREDENTIALS_FULL_URI=http://localhost:1338/latest/meta-data/container/security-credentials $CLICKHOUSE_LOCAL -q "select * from s3('http://localhost:11111/test/a.tsv')"

From 1df895f3dadcbb65d246927b79f42144e5fc1af2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 18 Jun 2024 14:04:40 +0200
Subject: [PATCH 022/363] Minor changes

---
 src/Access/SettingsConstraints.cpp                     |  8 ++++----
 src/Interpreters/InterpreterSetQuery.cpp               | 10 +++++-----
 src/Interpreters/InterpreterSetQuery.h                 |  2 +-
 .../03003_compatibility_setting_bad_value.sql          |  1 -
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp
index a274f6b54f2..7506e365035 100644
--- a/src/Access/SettingsConstraints.cpp
+++ b/src/Access/SettingsConstraints.cpp
@@ -219,8 +219,8 @@ void SettingsConstraints::clamp(const Settings & current_settings, SettingsChang
         });
 }
 
-template <class T>
-bool getNewValueToCheck(const T & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure)
+template <typename SettingsT>
+bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure)
 {
     Field current_value;
     bool has_current_value = current_settings.tryGet(change.name, current_value);
@@ -230,12 +230,12 @@ bool getNewValueToCheck(const T & current_settings, SettingChange & change, Fiel
         return false;
 
     if (throw_on_failure)
-        new_value = T::castValueUtil(change.name, change.value);
+        new_value = SettingsT::castValueUtil(change.name, change.value);
     else
     {
         try
         {
-            new_value = T::castValueUtil(change.name, change.value);
+            new_value = SettingsT::castValueUtil(change.name, change.value);
         }
         catch (...)
         {
diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp
index cac44c7747b..15d4ba56d8d 100644
--- a/src/Interpreters/InterpreterSetQuery.cpp
+++ b/src/Interpreters/InterpreterSetQuery.cpp
@@ -46,7 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel
     // It is flattened later, when we process UNION ALL/DISTINCT.
     const auto * last_select = children.back()->as<ASTSelectQuery>();
     if (last_select && last_select->settings())
-        InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext();
+        InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(false);
 }
 
 void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_)
@@ -58,7 +58,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
     {
         if (query_with_output->settings_ast)
-            InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext();
+            InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(false);
 
         if (const auto * create_query = ast->as<ASTCreateQuery>(); create_query && create_query->select)
             applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context_);
@@ -67,7 +67,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     if (const auto * select_query = ast->as<ASTSelectQuery>())
     {
         if (auto new_settings = select_query->settings())
-            InterpreterSetQuery(new_settings, context_).executeForCurrentContext();
+            InterpreterSetQuery(new_settings, context_).executeForCurrentContext(false);
     }
     else if (const auto * select_with_union_query = ast->as<ASTSelectWithUnionQuery>())
     {
@@ -76,7 +76,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     else if (const auto * explain_query = ast->as<ASTExplainQuery>())
     {
         if (explain_query->settings_ast)
-            InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext();
+            InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(false);
 
         applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
     }
@@ -84,7 +84,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     {
         context_->setInsertFormat(insert_query->format);
         if (insert_query->settings_ast)
-            InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext();
+            InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(false);
     }
 }
 
diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h
index 2438762f347..f50105c39f4 100644
--- a/src/Interpreters/InterpreterSetQuery.h
+++ b/src/Interpreters/InterpreterSetQuery.h
@@ -23,7 +23,7 @@ public:
     /** Set setting for current context (query context).
       * It is used for interpretation of SETTINGS clause in SELECT query.
       */
-    void executeForCurrentContext(bool ignore_setting_constraints = false);
+    void executeForCurrentContext(bool ignore_setting_constraints);
 
     bool supportsTransactions() const override { return true; }
 
diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
index 48e98798c51..b9fbfd917fc 100644
--- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
+++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
@@ -1,2 +1 @@
 select 42 settings compatibility=NULL;  -- {clientError BAD_ARGUMENTS}
-

From 11d54f4809a8b58773f13664e6b842cb6c7dce48 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 18 Jun 2024 15:18:53 +0200
Subject: [PATCH 023/363] Pass-through settings from the client

---
 programs/client/Client.cpp                                | 3 +++
 programs/server/Server.cpp                                | 2 +-
 src/Access/AccessControl.cpp                              | 8 +++++++-
 src/Access/AccessControl.h                                | 5 ++++-
 src/Client/ClientBase.cpp                                 | 1 -
 .../0_stateless/03003_compatibility_setting_bad_value.sql | 2 +-
 6 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index efe23d57478..22a035fbd71 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1184,6 +1184,9 @@ void Client::processConfig()
     global_context->setQueryKindInitial();
     global_context->setQuotaClientKey(config().getString("quota_key", ""));
     global_context->setQueryKind(query_kind);
+
+    /// Allow to pass-through unknown settings to the server.
+    global_context->getAccessControl().allowAllSettings();
 }
 
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 6414f7f6ea5..2f1d07790e1 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1824,7 +1824,7 @@ try
     auto & access_control = global_context->getAccessControl();
     try
     {
-        access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
+        access_control.setupFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
     }
     catch (...)
     {
diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp
index c3bb42160ad..9831621d6ac 100644
--- a/src/Access/AccessControl.cpp
+++ b/src/Access/AccessControl.cpp
@@ -264,7 +264,7 @@ AccessControl::AccessControl()
 AccessControl::~AccessControl() = default;
 
 
-void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
+void AccessControl::setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
                                         const zkutil::GetZooKeeper & get_zookeeper_function_)
 {
     if (config_.has("custom_settings_prefixes"))
@@ -852,4 +852,10 @@ const ExternalAuthenticators & AccessControl::getExternalAuthenticators() const
     return *external_authenticators;
 }
 
+
+void AccessControl::allowAllSettings()
+{
+    custom_settings_prefixes->registerPrefixes({""});
+}
+
 }
diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h
index d1537219a06..f408f6dfb0d 100644
--- a/src/Access/AccessControl.h
+++ b/src/Access/AccessControl.h
@@ -54,7 +54,7 @@ public:
     ~AccessControl() override;
 
     /// Initializes access storage (user directories).
-    void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
+    void setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
                              const zkutil::GetZooKeeper & get_zookeeper_function_);
 
     /// Parses access entities from a configuration loaded from users.xml.
@@ -235,6 +235,9 @@ public:
     /// Gets manager of notifications.
     AccessChangesNotifier & getChangesNotifier();
 
+    /// Allow all setting names - this can be used in clients to pass-through unknown settings to the server.
+    void allowAllSettings();
+
 private:
     class ContextAccessCache;
     class CustomSettingsPrefixes;
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 958a1f50813..617a56cfd95 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -2958,7 +2958,6 @@ void ClientBase::init(int argc, char ** argv)
         boost::replace_all(arg, "−", "--");
     }
 
-
     OptionsDescription options_description;
     options_description.main_description.emplace(createOptionsDescription("Main options", terminal_width));
 
diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
index b9fbfd917fc..3a09eec7452 100644
--- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
+++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
@@ -1 +1 @@
-select 42 settings compatibility=NULL;  -- {clientError BAD_ARGUMENTS}
+select 42 settings compatibility=NULL;  -- {clientError BAD_GET}

From 16c3e36b5a2f3203eb87161f4320ea5e70865fc4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 19 Jun 2024 22:50:52 +0200
Subject: [PATCH 024/363] Simplification

---
 src/Client/ClientBase.cpp    | 142 +++++++++++++++++------------------
 src/Client/ClientBase.h      |   1 +
 src/Interpreters/Session.cpp |   8 +-
 src/Interpreters/Session.h   |   3 +-
 4 files changed, 74 insertions(+), 80 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 617a56cfd95..490a560de2d 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -439,7 +439,7 @@ void ClientBase::sendExternalTables(ASTPtr parsed_query)
 
     std::vector<ExternalTableDataPtr> data;
     for (auto & table : external_tables)
-        data.emplace_back(table.getData(global_context));
+        data.emplace_back(table.getData(query_context));
 
     connection->sendExternalTablesData(data);
 }
@@ -652,10 +652,10 @@ try
         /// intermixed with data with parallel formatting.
         /// It may increase code complexity significantly.
         if (!extras_into_stdout || select_only_into_file)
-            output_format = global_context->getOutputFormatParallelIfPossible(
+            output_format = query_context->getOutputFormatParallelIfPossible(
                 current_format, out_file_buf ? *out_file_buf : *out_buf, block);
         else
-            output_format = global_context->getOutputFormat(
+            output_format = query_context->getOutputFormat(
                 current_format, out_file_buf ? *out_file_buf : *out_buf, block);
 
         output_format->setAutoFlush();
@@ -949,7 +949,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
     /// But for asynchronous inserts we don't extract data, because it's needed
     /// to be done on server side in that case (for coalescing the data from multiple inserts on server side).
     const auto * insert = parsed_query->as<ASTInsertQuery>();
-    if (insert && isSyncInsertWithData(*insert, global_context))
+    if (insert && isSyncInsertWithData(*insert, query_context))
         query_to_execute = full_query.substr(0, insert->data - full_query.data());
     else
         query_to_execute = full_query;
@@ -1067,7 +1067,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
         }
     }
 
-    const auto & settings = global_context->getSettingsRef();
+    const auto & settings = query_context->getSettingsRef();
     const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1;
 
     int retries_left = 10;
@@ -1082,10 +1082,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
                 connection_parameters.timeouts,
                 query,
                 query_parameters,
-                global_context->getCurrentQueryId(),
+                query_context->getCurrentQueryId(),
                 query_processing_stage,
-                &global_context->getSettingsRef(),
-                &global_context->getClientInfo(),
+                &query_context->getSettingsRef(),
+                &query_context->getClientInfo(),
                 true,
                 [&](const Progress & progress) { onProgress(progress); });
 
@@ -1275,7 +1275,7 @@ void ClientBase::onProgress(const Progress & value)
 
 void ClientBase::onTimezoneUpdate(const String & tz)
 {
-    global_context->setSetting("session_timezone", tz);
+    query_context->setSetting("session_timezone", tz);
 }
 
 
@@ -1471,13 +1471,13 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de
 
 void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query)
 {
-    if (!global_context->hasInsertionTable() && insert_query.table)
+    if (!query_context->hasInsertionTable() && insert_query.table)
     {
         String table = insert_query.table->as<ASTIdentifier &>().shortName();
         if (!table.empty())
         {
             String database = insert_query.database ? insert_query.database->as<ASTIdentifier &>().shortName() : "";
-            global_context->setInsertionTable(StorageID(database, table));
+            query_context->setInsertionTable(StorageID(database, table));
         }
     }
 }
@@ -1528,7 +1528,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
     const auto & parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
     if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in))))
     {
-        const auto & settings = global_context->getSettingsRef();
+        const auto & settings = query_context->getSettingsRef();
         if (settings.throw_if_no_data_to_insert)
             throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert");
         else
@@ -1542,10 +1542,10 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
         connection_parameters.timeouts,
         query,
         query_parameters,
-        global_context->getCurrentQueryId(),
+        query_context->getCurrentQueryId(),
         query_processing_stage,
-        &global_context->getSettingsRef(),
-        &global_context->getClientInfo(),
+        &query_context->getSettingsRef(),
+        &query_context->getClientInfo(),
         true,
         [&](const Progress & progress) { onProgress(progress); });
 
@@ -1593,7 +1593,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
 
         /// Set callback to be called on file progress.
         if (tty_buf)
-            progress_indication.setFileProgressCallback(global_context, *tty_buf);
+            progress_indication.setFileProgressCallback(query_context, *tty_buf);
     }
 
     /// If data fetched from file (maybe compressed file)
@@ -1627,10 +1627,10 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
         }
 
         StorageFile::CommonArguments args{
-            WithContext(global_context),
+            WithContext(query_context),
             parsed_insert_query->table_id,
             current_format,
-            getFormatSettings(global_context),
+            getFormatSettings(query_context),
             compression_method,
             columns_for_storage_file,
             ConstraintsDescription{},
@@ -1638,7 +1638,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
             {},
             String{},
         };
-        StoragePtr storage = std::make_shared<StorageFile>(in_file, global_context->getUserFilesPath(), args);
+        StoragePtr storage = std::make_shared<StorageFile>(in_file, query_context->getUserFilesPath(), args);
         storage->startup();
         SelectQueryInfo query_info;
 
@@ -1647,18 +1647,18 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
             auto metadata = storage->getInMemoryMetadataPtr();
             QueryPlan plan;
             storage->read(
-                    plan,
-                    sample.getNames(),
-                    storage->getStorageSnapshot(metadata, global_context),
-                    query_info,
-                    global_context,
-                    {},
-                    global_context->getSettingsRef().max_block_size,
-                    getNumberOfPhysicalCPUCores());
+                plan,
+                sample.getNames(),
+                storage->getStorageSnapshot(metadata, query_context),
+                query_info,
+                query_context,
+                {},
+                query_context->getSettingsRef().max_block_size,
+                getNumberOfPhysicalCPUCores());
 
             auto builder = plan.buildQueryPipeline(
-                QueryPlanOptimizationSettings::fromContext(global_context),
-                BuildQueryPipelineSettings::fromContext(global_context));
+                QueryPlanOptimizationSettings::fromContext(query_context),
+                BuildQueryPipelineSettings::fromContext(query_context));
 
             QueryPlanResourceHolder resources;
             auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources);
@@ -1719,14 +1719,14 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes
             current_format = insert->format;
     }
 
-    auto source = global_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
+    auto source = query_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
     Pipe pipe(source);
 
     if (columns_description.hasDefaults())
     {
         pipe.addSimpleTransform([&](const Block & header)
         {
-            return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, global_context);
+            return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, query_context);
         });
     }
 
@@ -1872,6 +1872,9 @@ void ClientBase::cancelQuery()
 void ClientBase::processParsedSingleQuery(const String & full_query, const String & query_to_execute,
         ASTPtr parsed_query, std::optional<bool> echo_query_, bool report_error)
 {
+    query_context = Context::createCopy(global_context);
+    query_context->makeQueryContext();
+
     resetOutput();
     have_error = false;
     cancelled = false;
@@ -1888,12 +1891,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
 
     if (is_interactive)
     {
-        global_context->setCurrentQueryId("");
+        query_context->setCurrentQueryId("");
         // Generate a new query_id
         for (const auto & query_id_format : query_id_formats)
         {
             writeString(query_id_format.first, std_out);
-            writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out);
+            writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", query_context->getCurrentQueryId())), std_out);
             writeChar('\n', std_out);
             std_out.next();
         }
@@ -1920,7 +1923,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
                 auto password = auth_data->getPassword();
 
                 if (password)
-                    global_context->getAccessControl().checkPasswordComplexityRules(*password);
+                    query_context->getAccessControl().checkPasswordComplexityRules(*password);
             }
         }
     }
@@ -1930,47 +1933,40 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
     progress_indication.resetProgress();
     profile_events.watch.restart();
 
+    /// Apply query settings to context, as they can affect the behavior on client-side.
+    InterpreterSetQuery::applySettingsFromQuery(parsed_query, query_context);
+
+    if (!connection->checkConnected(connection_parameters.timeouts))
+        connect();
+
+    ASTPtr input_function;
+    const auto * insert = parsed_query->as<ASTInsertQuery>();
+    if (insert && insert->select)
+        insert->tryFindInputFunction(input_function);
+
+    bool is_async_insert_with_inlined_data = query_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
+
+    if (is_async_insert_with_inlined_data)
     {
-        /// Temporarily apply query settings to context.
-        Settings old_settings = global_context->getSettings();
-        SCOPE_EXIT_SAFE({
-            global_context->setSettings(old_settings);
-        });
+        bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && isStdinNotEmptyAndValid(std_in);
+        bool have_external_data = have_data_in_stdin || insert->infile;
 
-        InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context);
-
-        if (!connection->checkConnected(connection_parameters.timeouts))
-            connect();
-
-        ASTPtr input_function;
-        const auto * insert = parsed_query->as<ASTInsertQuery>();
-        if (insert && insert->select)
-            insert->tryFindInputFunction(input_function);
-
-        bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
-
-        if (is_async_insert_with_inlined_data)
-        {
-            bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && isStdinNotEmptyAndValid(std_in);
-            bool have_external_data = have_data_in_stdin || insert->infile;
-
-            if (have_external_data)
-                throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                    "Processing async inserts with both inlined and external data (from stdin or infile) is not supported");
-        }
-
-        /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately.
-        if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data)
-        {
-            if (input_function && insert->format.empty())
-                throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()");
-
-            processInsertQuery(query_to_execute, parsed_query);
-        }
-        else
-            processOrdinaryQuery(query_to_execute, parsed_query);
+        if (have_external_data)
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                "Processing async inserts with both inlined and external data (from stdin or infile) is not supported");
     }
 
+    /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately.
+    if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data)
+    {
+        if (input_function && insert->format.empty())
+            throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()");
+
+        processInsertQuery(query_to_execute, parsed_query);
+    }
+    else
+        processOrdinaryQuery(query_to_execute, parsed_query);
+
     /// Do not change context (current DB, settings) in case of an exception.
     if (!have_error)
     {
@@ -2651,10 +2647,8 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
 
     if (!has_log_comment)
     {
-        Settings settings = global_context->getSettings();
         /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
-        settings.log_comment = fs::absolute(fs::path(file_name));
-        global_context->setSettings(settings);
+        global_context->setSetting("log_comment", String(fs::absolute(fs::path(file_name))));
     }
 
     return executeMultiQuery(queries_from_file);
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 220fcddc038..228a9d65ea7 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -198,6 +198,7 @@ protected:
     /// since other members can use them.
     SharedContextHolder shared_context;
     ContextMutablePtr global_context;
+    ContextMutablePtr query_context;
 
     bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
     bool is_multiquery = false;
diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp
index 396562189e0..9dd686290db 100644
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@@ -532,7 +532,7 @@ ContextMutablePtr Session::makeSessionContext()
     session_context->checkSettingsConstraints(settings_from_auth_server, SettingSource::QUERY);
     session_context->applySettingsChanges(settings_from_auth_server);
 
-    recordLoginSucess(session_context);
+    recordLoginSuccess(session_context);
 
     return session_context;
 }
@@ -596,7 +596,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std:
         { session_name_ },
         max_sessions_for_user);
 
-    recordLoginSucess(session_context);
+    recordLoginSuccess(session_context);
 
     return session_context;
 }
@@ -672,13 +672,13 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t
         user = query_context->getUser();
 
     /// Interserver does not create session context
-    recordLoginSucess(query_context);
+    recordLoginSuccess(query_context);
 
     return query_context;
 }
 
 
-void Session::recordLoginSucess(ContextPtr login_context) const
+void Session::recordLoginSuccess(ContextPtr login_context) const
 {
     if (notified_session_log_about_login)
         return;
diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h
index 14f6f806acd..fc41c78e666 100644
--- a/src/Interpreters/Session.h
+++ b/src/Interpreters/Session.h
@@ -102,8 +102,7 @@ public:
 private:
     std::shared_ptr<SessionLog> getSessionLog() const;
     ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const;
-    void recordLoginSucess(ContextPtr login_context) const;
-
+    void recordLoginSuccess(ContextPtr login_context) const;
 
     mutable bool notified_session_log_about_login = false;
     const UUID auth_id;

From fa5d4cfea183c64e3ff088f922c0960a3c3951e0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 20 Jun 2024 00:41:14 +0200
Subject: [PATCH 025/363] Fix error

---
 src/Client/ClientBase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 490a560de2d..8fcb9632be0 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -949,7 +949,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
     /// But for asynchronous inserts we don't extract data, because it's needed
     /// to be done on server side in that case (for coalescing the data from multiple inserts on server side).
     const auto * insert = parsed_query->as<ASTInsertQuery>();
-    if (insert && isSyncInsertWithData(*insert, query_context))
+    if (insert && isSyncInsertWithData(*insert, global_context))
         query_to_execute = full_query.substr(0, insert->data - full_query.data());
     else
         query_to_execute = full_query;

From e064171a68fcac110d27ff36a51b7a6bb4fbb251 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jun 2024 03:58:11 +0200
Subject: [PATCH 026/363] Fix errors

---
 programs/client/Client.cpp                             | 10 ----------
 programs/local/LocalServer.cpp                         |  3 ---
 src/Client/ClientBase.cpp                              |  4 +++-
 src/Client/LocalConnection.cpp                         |  1 -
 src/Client/LocalConnection.h                           |  2 --
 .../0_stateless/00857_global_joinsavel_table_alias.sql |  1 -
 6 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 22a035fbd71..ab02d9fac74 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -19,7 +19,6 @@
 
 #include <Common/Config/ConfigProcessor.h>
 #include <Common/Config/getClientConfigPath.h>
-#include <Common/CurrentThread.h>
 #include <Common/Exception.h>
 #include <Common/TerminalSize.h>
 #include <Common/config_version.h>
@@ -311,7 +310,6 @@ int Client::main(const std::vector<std::string> & /*args*/)
 try
 {
     UseSSL use_ssl;
-    auto & thread_status = MainThreadStatus::getInstance();
     setupSignalHandler();
 
     std::cout << std::fixed << std::setprecision(3);
@@ -326,14 +324,6 @@ try
     initTTYBuffer(toProgressOption(config().getString("progress", "default")));
     ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
 
-    {
-        // All that just to set DB::CurrentThread::get().getGlobalContext()
-        // which is required for client timezone (pushed from server) to work.
-        auto thread_group = std::make_shared<ThreadGroup>();
-        const_cast<ContextWeakPtr&>(thread_group->global_context) = global_context;
-        thread_status.attachToGroup(thread_group, false);
-    }
-
     /// Includes delayed_interactive.
     if (is_interactive)
     {
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index cb1c35743b2..e5f4bac852c 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -27,10 +27,8 @@
 #include <Common/Exception.h>
 #include <Common/Macros.h>
 #include <Common/Config/ConfigProcessor.h>
-#include <Common/ThreadStatus.h>
 #include <Common/TLDListsHolder.h>
 #include <Common/quoteString.h>
-#include <Common/randomSeed.h>
 #include <Common/ThreadPool.h>
 #include <Loggers/OwnFormattingChannel.h>
 #include <Loggers/OwnPatternFormatter.h>
@@ -48,7 +46,6 @@
 #include <Dictionaries/registerDictionaries.h>
 #include <Disks/registerDisks.h>
 #include <Formats/registerFormats.h>
-#include <boost/algorithm/string/replace.hpp>
 #include <boost/program_options/options_description.hpp>
 #include <base/argsToConfig.h>
 #include <filesystem>
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 38aa0ed8b14..03f088f2b61 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1872,7 +1872,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
         ASTPtr parsed_query, std::optional<bool> echo_query_, bool report_error)
 {
     query_context = Context::createCopy(global_context);
-    query_context->makeQueryContext();
+    CurrentThread::QueryScope query_scope(query_context);
 
     resetOutput();
     have_error = false;
@@ -2926,6 +2926,8 @@ void ClientBase::init(int argc, char ** argv)
     /// Don't parse options with Poco library, we prefer neat boost::program_options.
     stopOptionsProcessing();
 
+    MainThreadStatus::getInstance();
+
     stdin_is_a_tty = isatty(STDIN_FILENO);
     stdout_is_a_tty = isatty(STDOUT_FILENO);
     stderr_is_a_tty = isatty(STDERR_FILENO);
diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp
index c7494e31605..e63e5793505 100644
--- a/src/Client/LocalConnection.cpp
+++ b/src/Client/LocalConnection.cpp
@@ -125,7 +125,6 @@ void LocalConnection::sendQuery(
 
     state->query_id = query_id;
     state->query = query;
-    state->query_scope_holder = std::make_unique<CurrentThread::QueryScope>(query_context);
     state->stage = QueryProcessingStage::Enum(stage);
     state->profile_queue = std::make_shared<InternalProfileEventsQueue>(std::numeric_limits<int>::max());
     CurrentThread::attachInternalProfileEventsQueue(state->profile_queue);
diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h
index 899d134cce5..bdd0b481529 100644
--- a/src/Client/LocalConnection.h
+++ b/src/Client/LocalConnection.h
@@ -61,8 +61,6 @@ struct LocalQueryState
     /// Time after the last check to stop the request and send the progress.
     Stopwatch after_send_progress;
     Stopwatch after_send_profile_events;
-
-    std::unique_ptr<CurrentThread::QueryScope> query_scope_holder;
 };
 
 
diff --git a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql
index 2044a9b8d22..092b071cb48 100644
--- a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql
+++ b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql
@@ -1,4 +1,3 @@
-
 DROP TABLE IF EXISTS local_table;
 DROP TABLE IF EXISTS other_table;
 

From 84f81c61853f34d765475309932d73af55e25d0f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jun 2024 17:18:32 +0200
Subject: [PATCH 027/363] Fix error

---
 src/Client/Suggest.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp
index 0188ebc8173..c1f163939e8 100644
--- a/src/Client/Suggest.cpp
+++ b/src/Client/Suggest.cpp
@@ -96,6 +96,10 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p
     loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this]
     {
         ThreadStatus thread_status;
+        my_context->makeQueryContext();
+        auto group = ThreadGroup::createForQuery(my_context);
+        CurrentThread::attachToGroup(group);
+
         for (size_t retry = 0; retry < 10; ++retry)
         {
             try

From 602fa5cbadba2924bc4e57f26f5f37b00d7b086e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jun 2024 17:28:54 +0200
Subject: [PATCH 028/363] Fix error

---
 programs/local/LocalServer.cpp | 1 -
 src/Client/ClientBase.h        | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index e5f4bac852c..45641b999b6 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -428,7 +428,6 @@ int LocalServer::main(const std::vector<std::string> & /*args*/)
 try
 {
     UseSSL use_ssl;
-    thread_status.emplace();
 
     StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
 
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 228a9d65ea7..83b99696373 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -244,9 +244,6 @@ protected:
     Settings cmd_settings;
     MergeTreeSettings cmd_merge_tree_settings;
 
-    /// thread status should be destructed before shared context because it relies on process list.
-    std::optional<ThreadStatus> thread_status;
-
     ServerConnectionPtr connection;
     ConnectionParameters connection_parameters;
 

From e0ef26285a5cda87e5073c6a0aaecb67f9609a96 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 23 Jun 2024 19:46:00 +0200
Subject: [PATCH 029/363] Update submodule

---
 contrib/aws | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/aws b/contrib/aws
index 1c2946bfcb7..6463c9cbf47 160000
--- a/contrib/aws
+++ b/contrib/aws
@@ -1 +1 @@
-Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf
+Subproject commit 6463c9cbf47cab78e4a4fa97a866942f201c6a58

From 9948150b87c6ee4531e0130de095035e2f228ec1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 24 Jun 2024 00:19:20 +0200
Subject: [PATCH 030/363] Fix error

---
 src/IO/S3/PocoHTTPClient.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 18a21649167..3b7ec4d1d9c 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -172,7 +172,7 @@ protected:
     const RemoteHostFilter & remote_host_filter;
     unsigned int s3_max_redirects;
     bool s3_use_adaptive_timeouts = true;
-    bool enable_s3_requests_logging;
+    bool enable_s3_requests_logging = false;
     bool for_disk_s3;
 
     /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler

From 384aa9feb90bbf95c5bc0e5498af4aca769c2531 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 24 Jun 2024 13:51:20 +0200
Subject: [PATCH 031/363] Move setting to 24.7 changes

---
 src/Core/SettingsChangesHistory.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 1ab7dc69f60..deaeba2a7de 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -86,6 +86,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"24.7", {{"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
+              }},
     {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
               {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
               {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
@@ -97,7 +99,6 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
               {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
               {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
               {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
-              {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
               {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"},
               {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"},
               {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."},

From 3b377fabe70c447304fa36d04f0495c0e9432d9c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 25 Jun 2024 02:49:40 +0200
Subject: [PATCH 032/363] Update submodule

---
 contrib/aws | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/aws b/contrib/aws
index 1c2946bfcb7..d5450d76abd 160000
--- a/contrib/aws
+++ b/contrib/aws
@@ -1 +1 @@
-Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf
+Subproject commit d5450d76abda556ce145ddabe7e0cc6a7644ec59

From 49634db3ba2961dadbdc1689f0a4ef1ecdb8bea1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 25 Jun 2024 02:51:57 +0200
Subject: [PATCH 033/363] Update submodule

---
 contrib/aws-crt-cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp
index f532d6abc0d..e5aa45cacfd 160000
--- a/contrib/aws-crt-cpp
+++ b/contrib/aws-crt-cpp
@@ -1 +1 @@
-Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0
+Subproject commit e5aa45cacfdcda7719ead38760e7c61076f5745f

From d7b3c3e8a97835c9f7987a5852ef9469770f8560 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 29 Jun 2024 21:59:04 +0200
Subject: [PATCH 034/363] Add review suggestion

---
 src/Interpreters/InterpreterSetQuery.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp
index 15d4ba56d8d..2ae35c4313b 100644
--- a/src/Interpreters/InterpreterSetQuery.cpp
+++ b/src/Interpreters/InterpreterSetQuery.cpp
@@ -46,7 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel
     // It is flattened later, when we process UNION ALL/DISTINCT.
     const auto * last_select = children.back()->as<ASTSelectQuery>();
     if (last_select && last_select->settings())
-        InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(false);
+        InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(/* ignore_setting_constraints= */ false);
 }
 
 void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_)
@@ -58,7 +58,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
     {
         if (query_with_output->settings_ast)
-            InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(false);
+            InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
 
         if (const auto * create_query = ast->as<ASTCreateQuery>(); create_query && create_query->select)
             applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context_);
@@ -67,7 +67,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     if (const auto * select_query = ast->as<ASTSelectQuery>())
     {
         if (auto new_settings = select_query->settings())
-            InterpreterSetQuery(new_settings, context_).executeForCurrentContext(false);
+            InterpreterSetQuery(new_settings, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
     }
     else if (const auto * select_with_union_query = ast->as<ASTSelectWithUnionQuery>())
     {
@@ -76,7 +76,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     else if (const auto * explain_query = ast->as<ASTExplainQuery>())
     {
         if (explain_query->settings_ast)
-            InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(false);
+            InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
 
         applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
     }
@@ -84,7 +84,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     {
         context_->setInsertFormat(insert_query->format);
         if (insert_query->settings_ast)
-            InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(false);
+            InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
     }
 }
 

From b14823d07ee1c059d2f278c0cc4068474c79ad2a Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 3 Jul 2024 09:33:57 +0200
Subject: [PATCH 035/363] Update SettingsChangesHistory.cpp

---
 src/Core/SettingsChangesHistory.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 7719fe1e837..828031f4c23 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -60,6 +60,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
     {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
               {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
               {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
+              {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
               }},
     {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
               {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},

From 3b45916470affeb555ae20a4d557ea9de5075f50 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 22 Jul 2024 05:46:53 +0200
Subject: [PATCH 036/363] What if we tighten limits for functional tests?

---
 tests/config/install.sh         | 1 +
 tests/config/users.d/limits.xml | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 tests/config/users.d/limits.xml

diff --git a/tests/config/install.sh b/tests/config/install.sh
index 1b0edc5fc16..265b9248f4a 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -93,6 +93,7 @@ ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/nonconst_timezone.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/allow_introspection_functions.yaml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/replicated_ddl_entry.xml $DEST_SERVER_PATH/users.d/
+ln -sf $SRC_PATH/users.d/limits.xml $DEST_SERVER_PATH/users.d/
 
 if [[ -n "$USE_OLD_ANALYZER" ]] && [[ "$USE_OLD_ANALYZER" -eq 1 ]]; then
     ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/
diff --git a/tests/config/users.d/limits.xml b/tests/config/users.d/limits.xml
new file mode 100644
index 00000000000..f44c73241ab
--- /dev/null
+++ b/tests/config/users.d/limits.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <max_memory_usage>5G</max_memory_usage>
+            <max_rows_to_read>20000000</max_rows_to_read>
+        </default>
+    </profiles>
+</clickhouse>

From 8c264230e30bf97f6bac999401cd594b31e09977 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 22 Jul 2024 07:20:33 +0200
Subject: [PATCH 037/363] Loosen the limit

---
 docker/test/stateful/run.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index 857385f4715..72a8f31ab71 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -118,8 +118,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
     clickhouse-client --query "CREATE TABLE test.hits AS datasets.hits_v1"
     clickhouse-client --query "CREATE TABLE test.visits AS datasets.visits_v1"
 
-    clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1"
-    clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1"
+    clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1"
+    clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1"
 
     clickhouse-client --query "DROP TABLE datasets.hits_v1"
     clickhouse-client --query "DROP TABLE datasets.visits_v1"
@@ -191,16 +191,16 @@ else
             ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
             SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
 
-        clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
-        clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
+        clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
+        clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
         clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
         clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
     else
         clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
         clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
     fi
-    clickhouse-client --query "CREATE TABLE test.hits_s3  (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
-    clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
+    clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
+    clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
 fi
 
 clickhouse-client --query "SHOW TABLES FROM test"

From 4978869d2f709a9ad93cefc04cda43bcc739fb22 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 23 Jul 2024 18:04:38 +0800
Subject: [PATCH 038/363] stash

---
 src/Functions/FunctionOverlay.cpp | 481 ++++++++++++++++++++++++++++++
 1 file changed, 481 insertions(+)
 create mode 100644 src/Functions/FunctionOverlay.cpp

diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp
new file mode 100644
index 00000000000..6160335ad79
--- /dev/null
+++ b/src/Functions/FunctionOverlay.cpp
@@ -0,0 +1,481 @@
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnString.h>
+#include <Common/StringUtils.h>
+#include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <IO/WriteHelpers.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int ILLEGAL_COLUMN;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int ZERO_ARRAY_OR_TUPLE_INDEX;
+}
+
+namespace
+{
+
+/// If 'is_utf8' - measure offset and length in code points instead of bytes.
+/// Syntax: overlay(input, replace, offset[, length])
+template <bool is_utf8>
+class FunctionOverlay : public IFunction
+{
+public:
+    static constexpr auto name = is_utf8 ? "OverlayUTF8" : "Overlay";
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionOverlay>(); }
+    String getName() const override { return name; }
+    bool isVariadic() const override { return true; }
+    size_t getNumberOfArguments() const override { return 0; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        const size_t number_of_arguments = arguments.size();
+        if (number_of_arguments < 3 || number_of_arguments > 4)
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: "
+                "passed {}, should be 3 or 4",
+                getName(),
+                number_of_arguments);
+
+        /// first argument is string
+        if (!isString(arguments[0]))
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of first argument of function {}, expected String",
+                arguments[0]->getName(),
+                getName());
+
+        /// second argument is string
+        if (!isString(arguments[1]))
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of second argument of function {}, expected String",
+                arguments[1]->getName(),
+                getName());
+
+        if (!isNativeNumber(arguments[2]))
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of third argument of function {}, expected (U)Int8|16|32|64",
+                arguments[2]->getName(),
+                getName());
+
+        if (number_of_arguments == 4 && !isNativeNumber(arguments[3]))
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of second argument of function {}, expected (U)Int8|16|32|64",
+                arguments[3]->getName(),
+                getName());
+
+        return std::make_shared<DataTypeString>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const size_t number_of_arguments = arguments.size();
+
+        ColumnPtr column_string = arguments[0].column;
+        ColumnPtr column_offset = arguments[1].column;
+        ColumnPtr column_length;
+        if (number_of_arguments == 3)
+            column_length = arguments[2].column;
+
+        const ColumnConst * column_offset_const = checkAndGetColumn<ColumnConst>(column_offset.get());
+        const ColumnConst * column_length_const = nullptr;
+        if (number_of_arguments == 3)
+            column_length_const = checkAndGetColumn<ColumnConst>(column_length.get());
+
+        Int64 offset = 0;
+        Int64 length = 0;
+        if (column_offset_const)
+            offset = column_offset_const->getInt(0);
+        if (column_length_const)
+            length = column_length_const->getInt(0);
+
+        auto res_col = ColumnString::create();
+        auto & res_data = res_col->getChars();
+        auto & res_offsets = res_col->getOffsets();
+    }
+
+private:
+template <bool three_args = false, bool offset_is_const = false, bool length_is_const = false>
+    void constantConstant(
+        size_t rows,
+        const StringRef & input,
+        const StringRef & replace,
+        const ColumnPtr & column_offset,
+        const ColumnPtr & column_length,
+        Int64 const_offset,
+        Int64 const_length,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (!three_args && length_is_const && const_length < 0)
+        {
+            constantConstant(input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
+            return;
+        }
+
+        Int64 offset = 0; // start from 1, maybe negative
+        size_t valid_offset = 0; // start from 0, not negative
+        if constexpr (offset_is_const)
+        {
+            offset = const_offset;
+            valid_offset = offset > 0 ? (offset - 1) : (-offset);
+        }
+
+        size_t replace_size = replace.size;
+        Int64 length = 0; // maybe negative
+        size_t valid_length = 0; // not negative
+        if constexpr (!three_args && length_is_const)
+        {
+            assert(const_length >= 0);
+            valid_length = const_length;
+        }
+        else if constexpr (three_args)
+        {
+            valid_length = replace_size;
+        }
+
+        size_t res_offset = 0;
+        size_t input_size = input.size;
+        for (size_t i = 0; i < rows; ++i)
+        {
+            if constexpr (!offset_is_const)
+            {
+                offset = column_offset->getInt(i);
+                valid_offset = offset > 0 ? (offset - 1) : (-offset);
+            }
+
+            if constexpr (!three_args && !length_is_const)
+            {
+                length = column_length->getInt(i);
+                valid_length = length >= 0 ? length : replace_size;
+            }
+
+            size_t prefix_size = valid_offset > input_size ? input_size : valid_offset;
+            size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
+            size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
+            res_data.resize(new_res_size);
+
+            /// copy prefix before replaced region
+            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size);
+            res_offset += prefix_size;
+
+            /// copy replace
+            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size);
+            res_offset += replace_size;
+
+            /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
+            if (suffix_size)
+            {
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size);
+                res_offset += suffix_size;
+            }
+
+            /// add zero terminator
+            res_data[res_offset] = 0;
+            ++res_offset;
+
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    template <bool three_args = false, bool offset_is_const = false, bool length_is_const = false>
+    void vectorConstant(
+        const ColumnString::Chars & input_data,
+        const ColumnString::Offsets & input_offsets,
+        const StringRef & replace,
+        const ColumnPtr & column_offset,
+        const ColumnPtr & column_length,
+        Int64 const_offset,
+        Int64 const_length,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (!three_args && length_is_const && const_length < 0)
+        {
+            vectorConstant(input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
+            return;
+        }
+
+        Int64 offset = 0; // start from 1, maybe negative
+        size_t valid_offset = 0; // start from 0, not negative
+        if constexpr (offset_is_const)
+        {
+            offset = const_offset;
+            valid_offset = offset > 0 ? (offset - 1) : (-offset);
+        }
+
+        size_t replace_size = replace.size;
+        Int64 length = 0; // maybe negative
+        size_t valid_length = 0; // not negative
+        if constexpr (!three_args && length_is_const)
+        {
+            assert(const_length >= 0);
+            valid_length = const_length;
+        }
+        else if constexpr (three_args)
+        {
+            valid_length = replace_size;
+        }
+
+        size_t rows = input_offsets.size();
+        size_t res_offset = 0;
+        for (size_t i = 0; i < rows; ++i)
+        {
+            size_t input_offset = input_offsets[i - 1];
+            size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1;
+
+            if constexpr (!offset_is_const)
+            {
+                offset = column_offset->getInt(i);
+                valid_offset = offset > 0 ? (offset - 1) : (-offset);
+            }
+
+            if constexpr (!three_args && !length_is_const)
+            {
+                length = column_length->getInt(i);
+                valid_length = length >= 0 ? length : replace_size;
+            }
+
+            size_t prefix_size = valid_offset > input_size ? input_size : valid_offset;
+            size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
+            size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
+            res_data.resize(new_res_size);
+
+            /// copy prefix before replaced region
+            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size);
+            res_offset += prefix_size;
+
+            /// copy replace
+            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size);
+            res_offset += replace_size;
+
+            /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
+            if (suffix_size)
+            {
+                memcpySmallAllowReadWriteOverflow15(
+                    &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size);
+                res_offset += suffix_size;
+            }
+
+            /// add zero terminator
+            res_data[res_offset] = 0;
+            ++res_offset;
+
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    template <bool three_args = false, bool offset_is_const = false, bool length_is_const = false>
+    void constantVector(
+        const StringRef & input,
+        const ColumnString::Chars & replace_data,
+        const ColumnString::Offsets & replace_offsets,
+        const ColumnPtr & column_offset,
+        const ColumnPtr & column_length,
+        Int64 const_offset,
+        Int64 const_length,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (!three_args && length_is_const && const_length < 0)
+        {
+            constantVector(input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets);
+            return;
+        }
+
+        Int64 offset = 0; // start from 1, maybe negative
+        size_t valid_offset = 0; // start from 0, not negative
+        if constexpr (offset_is_const)
+        {
+            offset = const_offset;
+            valid_offset = offset > 0 ? (offset - 1) : (-offset);
+        }
+
+        Int64 length = 0; // maybe negative
+        size_t valid_length = 0; // not negative
+        if constexpr (!three_args && length_is_const)
+        {
+            assert(const_length >= 0);
+            valid_length = const_length;
+        }
+
+        size_t rows = replace_offsets.size();
+        size_t input_size = input.size;
+        size_t res_offset = 0;
+        for (size_t i = 0; i < rows; ++i)
+        {
+            size_t replace_offset = replace_offsets[i - 1];
+            size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1;
+
+            if constexpr (!offset_is_const)
+            {
+                offset = column_offset->getInt(i);
+                valid_offset = offset > 0 ? (offset - 1) : (-offset);
+            }
+
+            if constexpr (three_args)
+            {
+                // length = replace_size;
+                valid_length = replace_size;
+            }
+            else if constexpr (!length_is_const)
+            {
+                length = column_length->getInt(i);
+                valid_length = length >= 0 ? length : replace_size;
+            }
+
+            size_t prefix_size = valid_offset > input_size ? input_size : valid_offset;
+            size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
+            size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
+            res_data.resize(new_res_size);
+
+            /// copy prefix before replaced region
+            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size);
+            res_offset += prefix_size;
+
+            /// copy replace
+            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size);
+            res_offset += replace_size;
+
+            /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
+            if (suffix_size)
+            {
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size);
+                res_offset += suffix_size;
+            }
+
+            /// add zero terminator
+            res_data[res_offset] = 0;
+            ++res_offset;
+
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    template <bool three_args, bool offset_is_const, bool length_is_const>
+    void vectorVector(
+        const ColumnString::Chars & input_data,
+        const ColumnString::Offsets & input_offsets,
+        const ColumnString::Chars & replace_data,
+        const ColumnString::Offsets & replace_offsets,
+        const ColumnPtr & column_offset,
+        const ColumnPtr & column_length,
+        Int64 const_offset,
+        Int64 const_length,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (!three_args && length_is_const && const_length < 0)
+        {
+            vectorVector<true, offset_is_const, true>(
+                input_data,
+                input_offsets,
+                replace_data,
+                replace_offsets,
+                column_offset,
+                column_length,
+                const_offset,
+                -1,
+                res_data,
+                res_offsets);
+            return;
+        }
+
+
+        Int64 offset = 0; // start from 1, maybe negative
+        size_t valid_offset = 0; // start from 0, not negative
+        if constexpr (offset_is_const)
+        {
+            offset = const_offset;
+            valid_offset = offset > 0 ? (offset - 1) : (-offset);
+        }
+
+        Int64 length = 0; // maybe negative
+        size_t valid_length = 0; // not negative
+        if constexpr (!three_args && length_is_const)
+        {
+            assert(const_length >= 0);
+            valid_length = const_length;
+        }
+
+        size_t rows = input_offsets.size();
+        size_t res_offset = 0;
+        for (size_t i = 0; i < rows; ++i)
+        {
+            size_t input_offset = input_offsets[i - 1];
+            size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1;
+            size_t replace_offset = replace_offsets[i - 1];
+            size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1;
+
+            if constexpr (!offset_is_const)
+            {
+                offset = column_offset->getInt(i);
+                valid_offset = offset > 0 ? (offset - 1) : (-offset);
+            }
+
+            if constexpr (three_args)
+            {
+                // length = replace_size;
+                valid_length = replace_size;
+            }
+            else if constexpr (!length_is_const)
+            {
+                length = column_length->getInt(i);
+                valid_length = length >= 0 ? length : replace_size;
+            }
+
+            size_t prefix_size = valid_offset > input_size ? input_size : valid_offset;
+            size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
+            size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
+            res_data.resize(new_res_size);
+
+            /// copy prefix before replaced region
+            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size);
+            res_offset += prefix_size;
+
+            /// copy replace
+            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size);
+            res_offset += replace_size;
+
+            /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
+            if (suffix_size)
+            {
+                memcpySmallAllowReadWriteOverflow15(
+                    &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size);
+                res_offset += suffix_size;
+            }
+
+            /// add zero terminator
+            res_data[res_offset] = 0;
+            ++res_offset;
+
+            res_offsets[i] = res_offset;
+        }
+    }
+};
+
+}
+
+REGISTER_FUNCTION(Overlay)
+{
+    factory.registerFunction<FunctionOverlay<false>>({}, FunctionFactory::CaseInsensitive);
+    factory.registerFunction<FunctionOverlay<true>>({}, FunctionFactory::CaseSensitive);
+}
+
+}

From 81688e0efdf75a6a3923d6b95f09579d37e93e2a Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 23 Jul 2024 20:29:35 +0800
Subject: [PATCH 039/363] almost finish

---
 src/Functions/FunctionOverlay.cpp | 154 ++++++++++++++++++++++++++----
 1 file changed, 134 insertions(+), 20 deletions(-)

diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp
index 6160335ad79..65af4d811f5 100644
--- a/src/Functions/FunctionOverlay.cpp
+++ b/src/Functions/FunctionOverlay.cpp
@@ -15,10 +15,8 @@ namespace DB
 
 namespace ErrorCodes
 {
-extern const int ILLEGAL_COLUMN;
 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-extern const int ZERO_ARRAY_OR_TUPLE_INDEX;
 }
 
 namespace
@@ -86,32 +84,145 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
         const size_t number_of_arguments = arguments.size();
+        bool three_args = number_of_arguments == 3;
 
-        ColumnPtr column_string = arguments[0].column;
-        ColumnPtr column_offset = arguments[1].column;
+        ColumnPtr column_offset = arguments[2].column;
         ColumnPtr column_length;
-        if (number_of_arguments == 3)
-            column_length = arguments[2].column;
+        if (!three_args)
+            column_length = arguments[3].column;
 
         const ColumnConst * column_offset_const = checkAndGetColumn<ColumnConst>(column_offset.get());
         const ColumnConst * column_length_const = nullptr;
-        if (number_of_arguments == 3)
+        if (!three_args)
             column_length_const = checkAndGetColumn<ColumnConst>(column_length.get());
 
-        Int64 offset = 0;
-        Int64 length = 0;
+        bool offset_is_const = false;
+        bool length_is_const = false;
+        Int64 offset = -1;
+        Int64 length = -1;
         if (column_offset_const)
+        {
             offset = column_offset_const->getInt(0);
+            offset_is_const = true;
+        }
+
         if (column_length_const)
+        {
             length = column_length_const->getInt(0);
+            length_is_const = true;
+        }
+
 
         auto res_col = ColumnString::create();
         auto & res_data = res_col->getChars();
         auto & res_offsets = res_col->getOffsets();
+        res_offsets.resize_exact(input_rows_count);
+
+        ColumnPtr column_input = arguments[0].column;
+        ColumnPtr column_replace = arguments[1].column;
+
+        const auto * column_input_const = checkAndGetColumn<ColumnConst>(column_input.get());
+        const auto * column_input_string = checkAndGetColumn<ColumnString>(column_input.get());
+        if (column_input_const)
+        {
+            StringRef input = column_input_const->getDataAt(0);
+            res_data.reserve(input.size * input_rows_count);
+        }
+        else
+        {
+            res_data.reserve(column_input_string->getChars().size());
+        }
+
+        const auto * column_replace_const = checkAndGetColumn<ColumnConst>(column_replace.get());
+        const auto * column_replace_string = checkAndGetColumn<ColumnString>(column_replace.get());
+        bool input_is_const = column_input_const != nullptr;
+        bool replace_is_const = column_replace_const != nullptr;
+
+#define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \
+    if (input_is_const && replace_is_const) \
+        constantConstant<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+            input_rows_count, \
+            column_input_const->getDataAt(0), \
+            column_replace_const->getDataAt(0), \
+            column_offset, \
+            column_length, \
+            offset, \
+            length, \
+            res_data, \
+            res_offsets); \
+    else if (input_is_const) \
+        constantVector<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+            column_input_const->getDataAt(0), \
+            column_replace_string->getChars(), \
+            column_replace_string->getOffsets(), \
+            column_offset, \
+            column_length, \
+            offset, \
+            length, \
+            res_data, \
+            res_offsets); \
+    else if (replace_is_const) \
+        vectorConstant<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+            column_input_string->getChars(), \
+            column_input_string->getOffsets(), \
+            column_replace_const->getDataAt(0), \
+            column_offset, \
+            column_length, \
+            offset, \
+            length, \
+            res_data, \
+            res_offsets); \
+    else \
+        vectorVector<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+            column_input_string->getChars(), \
+            column_input_string->getOffsets(), \
+            column_replace_string->getChars(), \
+            column_replace_string->getOffsets(), \
+            column_offset, \
+            column_length, \
+            offset, \
+            length, \
+            res_data, \
+            res_offsets);
+
+        if (three_args)
+        {
+            if (offset_is_const)
+            {
+                OVERLAY_EXECUTE_CASE(true, true, false)
+            }
+            else
+            {
+                OVERLAY_EXECUTE_CASE(true, false, false)
+            }
+        }
+        else
+        {
+            if (offset_is_const && length_is_const)
+            {
+                OVERLAY_EXECUTE_CASE(false, true, true)
+            }
+            else if (offset_is_const && !length_is_const)
+            {
+                OVERLAY_EXECUTE_CASE(false, true, false)
+            }
+            else if (!offset_is_const && length_is_const)
+            {
+                OVERLAY_EXECUTE_CASE(false, false, true)
+            }
+            else
+            {
+                OVERLAY_EXECUTE_CASE(false, false, false)
+            }
+        }
+#undef OVERLAY_EXECUTE_CASE
+
+        return res_col;
     }
 
+
 private:
-template <bool three_args = false, bool offset_is_const = false, bool length_is_const = false>
+    template <bool three_args, bool offset_is_const, bool length_is_const>
     void constantConstant(
         size_t rows,
         const StringRef & input,
@@ -121,11 +232,12 @@ template <bool three_args = false, bool offset_is_const = false, bool length_is_
         Int64 const_offset,
         Int64 const_length,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets)
+        ColumnString::Offsets & res_offsets) const
     {
         if (!three_args && length_is_const && const_length < 0)
         {
-            constantConstant(input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
+            constantConstant<true, offset_is_const, false>(
+                rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
             return;
         }
 
@@ -194,7 +306,7 @@ template <bool three_args = false, bool offset_is_const = false, bool length_is_
         }
     }
 
-    template <bool three_args = false, bool offset_is_const = false, bool length_is_const = false>
+    template <bool three_args, bool offset_is_const, bool length_is_const>
     void vectorConstant(
         const ColumnString::Chars & input_data,
         const ColumnString::Offsets & input_offsets,
@@ -204,11 +316,12 @@ template <bool three_args = false, bool offset_is_const = false, bool length_is_
         Int64 const_offset,
         Int64 const_length,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets)
+        ColumnString::Offsets & res_offsets) const
     {
         if (!three_args && length_is_const && const_length < 0)
         {
-            vectorConstant(input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
+            vectorConstant<true, offset_is_const, false>(
+                input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
             return;
         }
 
@@ -281,7 +394,7 @@ template <bool three_args = false, bool offset_is_const = false, bool length_is_
         }
     }
 
-    template <bool three_args = false, bool offset_is_const = false, bool length_is_const = false>
+    template <bool three_args, bool offset_is_const, bool length_is_const>
     void constantVector(
         const StringRef & input,
         const ColumnString::Chars & replace_data,
@@ -291,11 +404,12 @@ template <bool three_args = false, bool offset_is_const = false, bool length_is_
         Int64 const_offset,
         Int64 const_length,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets)
+        ColumnString::Offsets & res_offsets) const
     {
         if (!three_args && length_is_const && const_length < 0)
         {
-            constantVector(input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets);
+            constantVector<true, offset_is_const, false>(
+                input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets);
             return;
         }
 
@@ -379,11 +493,11 @@ template <bool three_args = false, bool offset_is_const = false, bool length_is_
         Int64 const_offset,
         Int64 const_length,
         ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets)
+        ColumnString::Offsets & res_offsets) const
     {
         if (!three_args && length_is_const && const_length < 0)
         {
-            vectorVector<true, offset_is_const, true>(
+            vectorVector<true, offset_is_const, false>(
                 input_data,
                 input_offsets,
                 replace_data,

From 9785f85ca32e3af5760a9bc24e998e7d050fc073 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 23 Jul 2024 21:08:02 +0800
Subject: [PATCH 040/363] fix style

---
 src/Functions/FunctionOverlay.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp
index 65af4d811f5..7d0e2e86de2 100644
--- a/src/Functions/FunctionOverlay.cpp
+++ b/src/Functions/FunctionOverlay.cpp
@@ -1,14 +1,10 @@
 #include <Columns/ColumnConst.h>
-#include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnString.h>
-#include <Common/StringUtils.h>
-#include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
-#include <IO/WriteHelpers.h>
-
+#include <Common/StringUtils.h>
 
 namespace DB
 {

From f4138ee6c67bbdb82269a9087b1b054f33cb35a8 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 24 Jul 2024 10:20:27 +0800
Subject: [PATCH 041/363] fix bugs about corner cases

---
 src/Functions/FunctionOverlay.cpp | 89 +++++++++++++++++--------------
 1 file changed, 48 insertions(+), 41 deletions(-)

diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp
index 7d0e2e86de2..d3ee7e1df6d 100644
--- a/src/Functions/FunctionOverlay.cpp
+++ b/src/Functions/FunctionOverlay.cpp
@@ -218,6 +218,26 @@ public:
 
 
 private:
+    /// input offset is 1-based, maybe negative
+    /// output result is 0-based valid offset, within [0, input_size]
+    static size_t getValidOffset(Int64 offset, size_t input_size)
+    {
+        if (offset > 0)
+        {
+            if (static_cast<size_t>(offset) > input_size + 1) [[unlikely]]
+                return input_size;
+            else
+                return offset - 1;
+        }
+        else
+        {
+            if (input_size < -static_cast<size_t>(offset)) [[unlikely]]
+                return 0;
+            else
+                return input_size + offset;
+        }
+    }
+
     template <bool three_args, bool offset_is_const, bool length_is_const>
     void constantConstant(
         size_t rows,
@@ -237,13 +257,10 @@ private:
             return;
         }
 
-        Int64 offset = 0; // start from 1, maybe negative
+        size_t input_size = input.size;
         size_t valid_offset = 0; // start from 0, not negative
         if constexpr (offset_is_const)
-        {
-            offset = const_offset;
-            valid_offset = offset > 0 ? (offset - 1) : (-offset);
-        }
+            valid_offset = getValidOffset(const_offset, input_size);
 
         size_t replace_size = replace.size;
         Int64 length = 0; // maybe negative
@@ -258,14 +275,14 @@ private:
             valid_length = replace_size;
         }
 
+        Int64 offset = 0; // start from 1, maybe negative
         size_t res_offset = 0;
-        size_t input_size = input.size;
         for (size_t i = 0; i < rows; ++i)
         {
             if constexpr (!offset_is_const)
             {
                 offset = column_offset->getInt(i);
-                valid_offset = offset > 0 ? (offset - 1) : (-offset);
+                valid_offset = getValidOffset(offset, input_size);
             }
 
             if constexpr (!three_args && !length_is_const)
@@ -274,7 +291,7 @@ private:
                 valid_length = length >= 0 ? length : replace_size;
             }
 
-            size_t prefix_size = valid_offset > input_size ? input_size : valid_offset;
+            size_t prefix_size = valid_offset;
             size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
             size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
             res_data.resize(new_res_size);
@@ -321,14 +338,6 @@ private:
             return;
         }
 
-        Int64 offset = 0; // start from 1, maybe negative
-        size_t valid_offset = 0; // start from 0, not negative
-        if constexpr (offset_is_const)
-        {
-            offset = const_offset;
-            valid_offset = offset > 0 ? (offset - 1) : (-offset);
-        }
-
         size_t replace_size = replace.size;
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
@@ -343,16 +352,22 @@ private:
         }
 
         size_t rows = input_offsets.size();
+        Int64 offset = 0; // start from 1, maybe negative
+        size_t valid_offset = 0; // start from 0, not negative
         size_t res_offset = 0;
         for (size_t i = 0; i < rows; ++i)
         {
             size_t input_offset = input_offsets[i - 1];
             size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1;
 
-            if constexpr (!offset_is_const)
+            if constexpr (offset_is_const)
+            {
+                valid_offset = getValidOffset(const_offset, input_size);
+            }
+            else
             {
                 offset = column_offset->getInt(i);
-                valid_offset = offset > 0 ? (offset - 1) : (-offset);
+                valid_offset = getValidOffset(offset, input_size);
             }
 
             if constexpr (!three_args && !length_is_const)
@@ -361,7 +376,7 @@ private:
                 valid_length = length >= 0 ? length : replace_size;
             }
 
-            size_t prefix_size = valid_offset > input_size ? input_size : valid_offset;
+            size_t prefix_size = valid_offset;
             size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
             size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
             res_data.resize(new_res_size);
@@ -409,13 +424,10 @@ private:
             return;
         }
 
-        Int64 offset = 0; // start from 1, maybe negative
+        size_t input_size = input.size;
         size_t valid_offset = 0; // start from 0, not negative
         if constexpr (offset_is_const)
-        {
-            offset = const_offset;
-            valid_offset = offset > 0 ? (offset - 1) : (-offset);
-        }
+            valid_offset = getValidOffset(const_offset, input_size);
 
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
@@ -426,7 +438,7 @@ private:
         }
 
         size_t rows = replace_offsets.size();
-        size_t input_size = input.size;
+        Int64 offset = 0; // start from 1, maybe negative
         size_t res_offset = 0;
         for (size_t i = 0; i < rows; ++i)
         {
@@ -436,12 +448,11 @@ private:
             if constexpr (!offset_is_const)
             {
                 offset = column_offset->getInt(i);
-                valid_offset = offset > 0 ? (offset - 1) : (-offset);
+                valid_offset = getValidOffset(offset, input_size);
             }
 
             if constexpr (three_args)
             {
-                // length = replace_size;
                 valid_length = replace_size;
             }
             else if constexpr (!length_is_const)
@@ -450,7 +461,7 @@ private:
                 valid_length = length >= 0 ? length : replace_size;
             }
 
-            size_t prefix_size = valid_offset > input_size ? input_size : valid_offset;
+            size_t prefix_size = valid_offset;
             size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
             size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
             res_data.resize(new_res_size);
@@ -507,15 +518,6 @@ private:
             return;
         }
 
-
-        Int64 offset = 0; // start from 1, maybe negative
-        size_t valid_offset = 0; // start from 0, not negative
-        if constexpr (offset_is_const)
-        {
-            offset = const_offset;
-            valid_offset = offset > 0 ? (offset - 1) : (-offset);
-        }
-
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
         if constexpr (!three_args && length_is_const)
@@ -525,6 +527,8 @@ private:
         }
 
         size_t rows = input_offsets.size();
+        Int64 offset = 0; // start from 1, maybe negative
+        size_t valid_offset = 0; // start from 0, not negative
         size_t res_offset = 0;
         for (size_t i = 0; i < rows; ++i)
         {
@@ -533,15 +537,18 @@ private:
             size_t replace_offset = replace_offsets[i - 1];
             size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1;
 
-            if constexpr (!offset_is_const)
+            if constexpr (offset_is_const)
+            {
+                valid_offset = getValidOffset(const_offset, input_size);
+            }
+            else
             {
                 offset = column_offset->getInt(i);
-                valid_offset = offset > 0 ? (offset - 1) : (-offset);
+                valid_offset = getValidOffset(offset, input_size);
             }
 
             if constexpr (three_args)
             {
-                // length = replace_size;
                 valid_length = replace_size;
             }
             else if constexpr (!length_is_const)
@@ -550,7 +557,7 @@ private:
                 valid_length = length >= 0 ? length : replace_size;
             }
 
-            size_t prefix_size = valid_offset > input_size ? input_size : valid_offset;
+            size_t prefix_size = valid_offset;
             size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
             size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
             res_data.resize(new_res_size);

From b42069cfa80e66ab59669bb7ccb93c2944d91170 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 04:27:11 +0200
Subject: [PATCH 042/363] Adjust some tests

---
 .../0_stateless/00111_shard_external_sort_distributed.sql | 6 +++---
 .../0_stateless/00463_long_sessions_in_http_interface.sh  | 2 +-
 tests/queries/0_stateless/00601_kill_running_query.sh     | 2 +-
 tests/queries/0_stateless/00976_max_execution_speed.sql   | 2 +-
 .../0_stateless/01119_optimize_trivial_insert_select.sql  | 5 +++--
 .../queries/0_stateless/01245_limit_infinite_sources.sql  | 1 +
 tests/queries/0_stateless/01249_flush_interactive.sh      | 4 ++--
 tests/queries/0_stateless/01293_show_settings.reference   | 1 +
 .../01301_aggregate_state_exception_memory_leak.sh        | 2 +-
 tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 2 +-
 .../0_stateless/02021_exponential_sum_shard.reference     | 1 -
 tests/queries/0_stateless/02021_exponential_sum_shard.sql | 1 -
 tests/queries/0_stateless/02136_kill_scalar_queries.sh    | 2 +-
 tests/queries/0_stateless/02293_ttest_large_samples.sql   | 2 ++
 .../02294_floating_point_second_in_settings.sh            | 6 +++---
 tests/queries/0_stateless/02343_aggregation_pipeline.sql  | 8 +++-----
 .../0_stateless/02697_stop_reading_on_first_cancel.sh     | 2 +-
 tests/queries/0_stateless/02700_s3_part_INT_MAX.sh        | 4 +++-
 .../02896_max_execution_time_with_break_overflow_mode.sql | 2 ++
 tests/queries/0_stateless/02915_sleep_large_uint.sql      | 1 +
 ...p_virtual_columns_with_non_deterministic_functions.sql | 1 +
 21 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
index 112f5edae36..88a05f59111 100644
--- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
+++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
@@ -1,10 +1,10 @@
 -- Tags: distributed
 
-SET max_memory_usage = 300000000;
-SET max_bytes_before_external_sort = 20000000;
+SET max_memory_usage = 150000000;
+SET max_bytes_before_external_sort = 10000000;
 
 DROP TABLE IF EXISTS numbers10m;
-CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 10000000;
+CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 5000000;
 
 SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 19999980, 20;
 
diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
index d41d6409315..bb77a88820a 100755
--- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
+++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
@@ -74,7 +74,7 @@ ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE t"
 
 echo "A session cannot be used by concurrent connections:"
 
-${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT count() FROM system.numbers" >/dev/null &
+${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9&max_rows_to_read=0" --data-binary "SELECT count() FROM system.numbers" >/dev/null &
 
 # An infinite loop is required to make the test reliable. We will ensure that at least once the query on the line above has started before this check
 while true
diff --git a/tests/queries/0_stateless/00601_kill_running_query.sh b/tests/queries/0_stateless/00601_kill_running_query.sh
index 3163f8146d0..be0fff49129 100755
--- a/tests/queries/0_stateless/00601_kill_running_query.sh
+++ b/tests/queries/0_stateless/00601_kill_running_query.sh
@@ -11,7 +11,7 @@ function wait_for_query_to_start()
     while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done
 }
 
-${CLICKHOUSE_CURL_COMMAND} -q --max-time 30 -sS "$CLICKHOUSE_URL&query_id=test_00601_$CLICKHOUSE_DATABASE" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k)' > /dev/null &
+${CLICKHOUSE_CURL_COMMAND} -q --max-time 30 -sS "$CLICKHOUSE_URL&query_id=test_00601_$CLICKHOUSE_DATABASE" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) SETTINGS max_rows_to_read = 0' > /dev/null &
 wait_for_query_to_start "test_00601_$CLICKHOUSE_DATABASE"
 $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'test_00601_$CLICKHOUSE_DATABASE'"
 wait
diff --git a/tests/queries/0_stateless/00976_max_execution_speed.sql b/tests/queries/0_stateless/00976_max_execution_speed.sql
index 52c3f05ff43..41374712724 100644
--- a/tests/queries/0_stateless/00976_max_execution_speed.sql
+++ b/tests/queries/0_stateless/00976_max_execution_speed.sql
@@ -1,2 +1,2 @@
-SET max_execution_speed = 1, max_execution_time = 3;
+SET max_execution_speed = 1, max_execution_time = 3, max_rows_to_read = 0;
 SELECT count() FROM system.numbers; -- { serverError TIMEOUT_EXCEEDED }
diff --git a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql
index a53b60a5ad3..2b301d7aced 100644
--- a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql
+++ b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql
@@ -1,8 +1,9 @@
 drop table if exists t;
 create table t(n int, a Int64, s String) engine = MergeTree() order by a;
 
-set enable_positional_arguments=0;
-set optimize_trivial_insert_select=1;
+set enable_positional_arguments = 0;
+set optimize_trivial_insert_select = 1;
+set max_rows_to_read = 0;
 
 -- due to aggregate functions, optimize_trivial_insert_select will not be applied
 insert into t select 1, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) settings max_insert_threads=4, max_threads=2;
diff --git a/tests/queries/0_stateless/01245_limit_infinite_sources.sql b/tests/queries/0_stateless/01245_limit_infinite_sources.sql
index 05680d86a33..69c93baf8a8 100644
--- a/tests/queries/0_stateless/01245_limit_infinite_sources.sql
+++ b/tests/queries/0_stateless/01245_limit_infinite_sources.sql
@@ -9,3 +9,4 @@ FROM
 )
 WHERE number = 1
 LIMIT 1
+SETTINGS max_rows_to_read = 0;
diff --git a/tests/queries/0_stateless/01249_flush_interactive.sh b/tests/queries/0_stateless/01249_flush_interactive.sh
index 551e11c8c8d..775b7825a16 100755
--- a/tests/queries/0_stateless/01249_flush_interactive.sh
+++ b/tests/queries/0_stateless/01249_flush_interactive.sh
@@ -14,10 +14,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 function test()
 {
-    timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --query "
+    timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --max_rows_to_read 0 --query "
         SELECT DISTINCT number % 5 FROM system.numbers" ||:
     echo -e '---'
-    timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10" --data-binary "
+    timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10&max_rows_to_read=0" --data-binary "
         SELECT DISTINCT number % 5 FROM system.numbers" ||:
     echo -e '---'
 }
diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference
index 187f55697e4..9d326f16a3b 100644
--- a/tests/queries/0_stateless/01293_show_settings.reference
+++ b/tests/queries/0_stateless/01293_show_settings.reference
@@ -5,5 +5,6 @@ connect_timeout_with_failover_secure_ms	Milliseconds	3000
 external_storage_connect_timeout_sec	UInt64	10
 s3_connect_timeout_ms	UInt64	1000
 filesystem_prefetch_max_memory_usage	UInt64	1073741824
+max_memory_usage	UInt64	5000000000
 max_untracked_memory	UInt64	1048576
 memory_profiler_step	UInt64	1048576
diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh
index 47fe7a9c7d9..9dd800ceb09 100755
--- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh
+++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 start=$SECONDS
 # If the memory leak exists, it will lead to OOM fairly quickly.
 for _ in {1..1000}; do
-    $CLICKHOUSE_CLIENT --max_memory_usage 1G <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10";
+    $CLICKHOUSE_CLIENT --max_memory_usage 1G --max_rows_to_read 0 <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10";
 
     # NOTE: we cannot use timeout here since this will not guarantee that the query will be executed at least once.
     # (since graceful wait of clickhouse-client had been reverted)
diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
index 1cf52c0288b..ec14f637c01 100644
--- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
+++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
@@ -9,7 +9,7 @@ create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index
 INSERT INTO t SELECT
     number,
     if(number < (8129 * 1024), arrayStringConcat(arrayMap(x -> toString(x), range(number % 128)), ' '), '')
-FROM numbers_mt((8129 * 1024) * 3) settings max_insert_threads=8;
+FROM numbers_mt((8129 * 1024) * 3) settings max_insert_threads=8, max_rows_to_read=0;
 
 -- optimize table t final;
 
diff --git a/tests/queries/0_stateless/02021_exponential_sum_shard.reference b/tests/queries/0_stateless/02021_exponential_sum_shard.reference
index 8453706a05a..c28e5d7a132 100644
--- a/tests/queries/0_stateless/02021_exponential_sum_shard.reference
+++ b/tests/queries/0_stateless/02021_exponential_sum_shard.reference
@@ -2,4 +2,3 @@
 0.009775171065493644
 0.009775171065493644
 0.009775171065493644
-0.009775171065493644
diff --git a/tests/queries/0_stateless/02021_exponential_sum_shard.sql b/tests/queries/0_stateless/02021_exponential_sum_shard.sql
index 49fde0fe217..8e91637e41d 100644
--- a/tests/queries/0_stateless/02021_exponential_sum_shard.sql
+++ b/tests/queries/0_stateless/02021_exponential_sum_shard.sql
@@ -3,4 +3,3 @@ WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)
 WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(10000));
 WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(100000));
 WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(1000000));
-WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(10000000));
diff --git a/tests/queries/0_stateless/02136_kill_scalar_queries.sh b/tests/queries/0_stateless/02136_kill_scalar_queries.sh
index c8691b62360..f8bd5a42756 100755
--- a/tests/queries/0_stateless/02136_kill_scalar_queries.sh
+++ b/tests/queries/0_stateless/02136_kill_scalar_queries.sh
@@ -10,7 +10,7 @@ function wait_for_query_to_start()
 }
 
 QUERY_1_ID="${CLICKHOUSE_DATABASE}_TEST02132KILL_QUERY1"
-(${CLICKHOUSE_CLIENT} --query_id="${QUERY_1_ID}" --query='select (SELECT max(number) from system.numbers) + 1;'  2>&1 | grep -q "Code: 394." || echo 'FAIL') &
+(${CLICKHOUSE_CLIENT} --max_rows_to_read 0 --query_id="${QUERY_1_ID}" --query='select (SELECT max(number) from system.numbers) + 1;'  2>&1 | grep -q "Code: 394." || echo 'FAIL') &
 wait_for_query_to_start "${QUERY_1_ID}"
 ${CLICKHOUSE_CLIENT} --query="KILL QUERY WHERE query_id='${QUERY_1_ID}' SYNC"
 
diff --git a/tests/queries/0_stateless/02293_ttest_large_samples.sql b/tests/queries/0_stateless/02293_ttest_large_samples.sql
index 14baa3fddfe..826bd483fe9 100644
--- a/tests/queries/0_stateless/02293_ttest_large_samples.sql
+++ b/tests/queries/0_stateless/02293_ttest_large_samples.sql
@@ -15,6 +15,8 @@ SELECT
 FROM system.numbers limit 500000));
 
 
+SET max_rows_to_read = 0;
+
 SELECT roundBankers(result.1, 5), roundBankers(result.2, 5 ) FROM (
 SELECT
      studentTTest(sample, variant) as result
diff --git a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh
index 7a18b8fea29..27dbd3e3de6 100755
--- a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh
+++ b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh
@@ -23,16 +23,16 @@ function check_output() {
 
 # TCP CLIENT
 echo "TCP CLIENT"
-OUTPUT=$($CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true)
+OUTPUT=$($CLICKHOUSE_CLIENT --max_rows_to_read 0 --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true)
 check_output "${OUTPUT}"
 
 echo "TCP CLIENT WITH SETTINGS IN QUERY"
-OUTPUT=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true)
+OUTPUT=$($CLICKHOUSE_CLIENT --max_rows_to_read 0 -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true)
 check_output "${OUTPUT}"
 
 # HTTP CLIENT
 echo "HTTP CLIENT"
-OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=$MAX_TIMEOUT" -d \
+OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=${MAX_TIMEOUT}&max_rows_to_read=0" -d \
     "SELECT count() FROM system.numbers" || true)
 check_output "${OUTPUT}"
 
diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql
index 0f9dbd0247d..24d54293313 100644
--- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql
+++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql
@@ -13,11 +13,9 @@ set allow_prefetched_read_pool_for_local_filesystem = 0;
 
 -- { echoOn }
 
-explain pipeline select * from (select * from numbers(1e8) group by number) group by number;
-
-explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number;
-
-explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number;
+explain pipeline select * from (select * from numbers(1e8) group by number) group by number settings max_rows_to_read = 0;
+explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number settings max_rows_to_read = 0;
+explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number settings max_rows_to_read = 0;
 
 explain pipeline select number from remote('127.0.0.{1,2,3}', system, numbers_mt) group by number settings distributed_aggregation_memory_efficient = 1;
 
diff --git a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
index 2be13588453..5a2cec08eca 100755
--- a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
+++ b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 QUERY_ID="${CLICKHOUSE_DATABASE}_read_with_cancel"
 
-$CLICKHOUSE_CLIENT -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" &
+$CLICKHOUSE_CLIENT --max_rows_to_read 0 -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" &
 pid=$!
 
 for _ in {0..60}
diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
index a34a480a078..c431686b594 100755
--- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
+++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
@@ -10,7 +10,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # NOTE: .sh test is used over .sql because it needs $CLICKHOUSE_DATABASE to
 # avoid truncation, since seems that the version of MinIO that is used on CI
 # too slow with this.
-$CLICKHOUSE_CLIENT -nm -q "
+#
+# Unfortunately, the test has to buffer it in memory.
+$CLICKHOUSE_CLIENT --max_memory_usage 10G -nm -q "
     INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV')
     SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024)
     SETTINGS s3_max_single_part_upload_size = '5Gi';
diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql
index ec86a66c7dd..3e131cad0f0 100644
--- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql
+++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql
@@ -1,5 +1,7 @@
 -- Tags: no-fasttest
 
+SET max_rows_to_read = 0;
+
 -- Query stops after timeout without an error
 SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null;
 
diff --git a/tests/queries/0_stateless/02915_sleep_large_uint.sql b/tests/queries/0_stateless/02915_sleep_large_uint.sql
index f7c04ab6d1f..08b6c580a28 100644
--- a/tests/queries/0_stateless/02915_sleep_large_uint.sql
+++ b/tests/queries/0_stateless/02915_sleep_large_uint.sql
@@ -1,6 +1,7 @@
 SELECT sleep(3.40282e+44); -- { serverError BAD_ARGUMENTS }
 SELECT sleep((pow(2, 64) / 1000000) - 1); -- { serverError BAD_ARGUMENTS }
 SELECT sleepEachRow(184467440737095516) from numbers(10000); -- { serverError BAD_ARGUMENTS }
+SET max_rows_to_read = 0;
 SELECT sleepEachRow(pow(2, 31)) from numbers(9007199254740992) settings function_sleep_max_microseconds_per_block = 8589934592000000000; -- { serverError TOO_SLOW }
 
 -- Another corner case, but it requires lots of memory to run (huge block size)
diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql
index 8ccc3cf61da..6ef8c5a8656 100644
--- a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql
+++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql
@@ -1,3 +1,4 @@
+SET max_rows_to_read = 0;
 create table test (number UInt64) engine=MergeTree order by number;
 insert into test select * from numbers(50000000);
 select ignore(number) from test where RAND() > 4292390314 limit 10;

From 016888e29a828870d5cdb50a0eb5e1514bafc97c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 04:32:20 +0200
Subject: [PATCH 043/363] Adjust some tests

---
 tests/queries/1_stateful/00067_union_all.sql                   | 3 ++-
 .../00088_global_in_one_shard_and_rows_before_limit.sql        | 2 +-
 .../queries/1_stateful/00147_global_in_aggregate_function.sql  | 1 +
 .../queries/1_stateful/00149_quantiles_timing_distributed.sql  | 1 +
 tests/queries/1_stateful/00167_read_bytes_from_fs.sql          | 1 +
 .../1_stateful/00171_grouping_aggregated_transform_bug.sql     | 1 +
 .../1_stateful/00182_simple_squashing_transform_bug.sql        | 1 +
 7 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/queries/1_stateful/00067_union_all.sql b/tests/queries/1_stateful/00067_union_all.sql
index 2a1d00e975d..9ee14b36b03 100644
--- a/tests/queries/1_stateful/00067_union_all.sql
+++ b/tests/queries/1_stateful/00067_union_all.sql
@@ -10,4 +10,5 @@ UNION ALL
 	ORDER BY id DESC
 	LIMIT 10
 )
-ORDER BY id, event;
+ORDER BY id, event
+SETTINGS max_rows_to_read = 40_000_000;
diff --git a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql
index 52f9c46997f..443808e7bed 100644
--- a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql
+++ b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql
@@ -1,4 +1,4 @@
 -- Tags: shard
 
-SET output_format_write_statistics = 0;
+SET output_format_write_statistics = 0, max_rows_to_read = 20_000_000;
 SELECT EventDate, count() FROM remote('127.0.0.1', test.hits) WHERE UserID GLOBAL IN (SELECT UserID FROM test.hits) GROUP BY EventDate ORDER BY EventDate LIMIT 5 FORMAT JSONCompact;
diff --git a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql
index 075c01530c6..c156f073573 100644
--- a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql
+++ b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql
@@ -1,4 +1,5 @@
 -- Tags: global
 
+SET max_rows_to_read = 40_000_000;
 SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits);
 SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits);
diff --git a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql
index 6f910646fb7..16b565985ea 100644
--- a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql
+++ b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql
@@ -1,4 +1,5 @@
 -- Tags: distributed
 
+SET max_rows_to_read = 100_000_000;
 SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID);
 SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1;
diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql
index 7b3f50f8141..1a98a531067 100644
--- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql
+++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql
@@ -1,5 +1,6 @@
 -- Tags: no-random-settings
 
+SET max_memory_usage = '10G'
 SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40;
 
 -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb.
diff --git a/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql b/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql
index 7068780a1b1..b3e4d749328 100644
--- a/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql
+++ b/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql
@@ -1,4 +1,5 @@
 -- Tags: distributed
 
+SET max_rows_to_read = '100M';
 SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS max_block_size = 63169;
 SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1, max_block_size = 63169;
diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql
index e73de4b33fb..85bad651090 100644
--- a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql
+++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql
@@ -1,6 +1,7 @@
 -- Tags: global
 
 set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387;
+set max_rows_to_read = '20M';
 
 SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits);
 SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits);

From e569a305ba07225ee641ae4af07ba9c88e4608d4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 04:46:31 +0200
Subject: [PATCH 044/363] Adjust some tests

---
 .../00002_log_and_exception_messages_formatting.sql         | 2 ++
 .../0_stateless/00375_shard_group_uniq_array_of_string.sql  | 2 +-
 .../00376_shard_group_uniq_array_of_int_array.sql           | 2 +-
 .../00377_shard_group_uniq_array_of_string_array.sql        | 2 +-
 tests/queries/0_stateless/00600_replace_running_query.sh    | 6 +++---
 .../00834_cancel_http_readonly_queries_on_client_close.sh   | 2 +-
 tests/queries/0_stateless/00906_low_cardinality_cache.sql   | 2 +-
 tests/queries/0_stateless/01304_direct_io_long.sh           | 4 ++--
 tests/queries/0_stateless/02021_exponential_sum.reference   | 1 -
 tests/queries/0_stateless/02021_exponential_sum.sql         | 1 -
 tests/queries/0_stateless/02234_cast_to_ip_address.sql      | 2 +-
 .../0_stateless/02450_kill_distributed_query_deadlock.sh    | 2 +-
 tests/queries/0_stateless/02585_query_status_deadlock.sh    | 3 +--
 tests/queries/0_stateless/02786_max_execution_time_leaf.sql | 1 +
 .../0_stateless/02844_subquery_timeout_with_break.sql       | 2 +-
 tests/queries/0_stateless/02916_glogal_in_cancel.sql        | 2 +-
 16 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 07c42d6d039..c158406c0da 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -7,6 +7,8 @@ system flush logs;
 drop table if exists logs;
 create view logs as select * from system.text_log where now() - toIntervalMinute(120) < event_time;
 
+SET max_rows_to_read = 0;
+
 -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation.
 -- 0.001 threshold should be always enough, the value was about 0.00025
 WITH 0.001 AS threshold
diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
index 8a310cb8fc9..f32a64cd30f 100644
--- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
+++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
@@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst
 INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000;
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id;
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M';
 SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
 SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
 
diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
index abd0e6e6a45..43066880102 100644
--- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
+++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
@@ -6,7 +6,7 @@ CREATE TABLE group_uniq_arr_int ENGINE = Memory AS
 		(SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000);
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id;
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M';
 SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
 SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
 
diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
index e9cfff211f8..1c4376ad577 100644
--- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
+++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
@@ -6,6 +6,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS
         (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000);
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id;
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M';
 
 DROP TABLE IF EXISTS group_uniq_arr_str;
diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh
index 7a71d17f19b..e7022875086 100755
--- a/tests/queries/0_stateless/00600_replace_running_query.sh
+++ b/tests/queries/0_stateless/00600_replace_running_query.sh
@@ -17,7 +17,7 @@ function wait_for_query_to_start()
 }
 
 
-$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 &
+$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1&max_rows_to_read=0" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 &
 wait_for_query_to_start 'hello'
 
 # Replace it
@@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d
 # Wait for it to be replaced
 wait
 
-${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
+${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --max_rows_to_read=0 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
 wait_for_query_to_start '42'
 
 # Trying to run another query with the same query_id
@@ -38,7 +38,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=42&replace_running_query=1" -d 'S
 $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" > /dev/null
 wait
 
-${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
+${CLICKHOUSE_CLIENT} --query_id=42 --max_rows_to_read=0 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
 wait_for_query_to_start '42'
 ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null
 wait
diff --git a/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh
index 5c21c70e06a..dd3735f27b1 100755
--- a/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh
+++ b/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)'
+${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&max_rows_to_read=0&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)'
 
 i=0 retries=300
 while [[ $i -lt $retries ]]; do
diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql
index 55eacd0db44..15a53841761 100644
--- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql
+++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql
@@ -1,5 +1,5 @@
 drop table if exists lc_00906;
 create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
-insert into lc_00906 select '0123456789' from numbers(100000000);
+insert into lc_00906 select '0123456789' from numbers(100000000) SETTINGS max_rows_to_read = '100M';
 select count(), b from lc_00906 group by b;
 drop table if exists lc_00906;
diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh
index 2e27c2f7728..a66239058ab 100755
--- a/tests/queries/0_stateless/01304_direct_io_long.sh
+++ b/tests/queries/0_stateless/01304_direct_io_long.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT --multiquery --query "
+$CLICKHOUSE_CLIENT --max_rows_to_read 50M --multiquery "
     DROP TABLE IF EXISTS bug;
     CREATE TABLE bug (UserID UInt64, Date Date) ENGINE = MergeTree ORDER BY Date
         SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi', merge_max_block_size = 8192;
@@ -18,5 +18,5 @@ cat "$LOG" | grep Loaded
 
 rm "$LOG"
 
-$CLICKHOUSE_CLIENT --multiquery --query "
+$CLICKHOUSE_CLIENT --multiquery "
     DROP TABLE bug;"
diff --git a/tests/queries/0_stateless/02021_exponential_sum.reference b/tests/queries/0_stateless/02021_exponential_sum.reference
index 5bd77479cf7..c9dcee51173 100644
--- a/tests/queries/0_stateless/02021_exponential_sum.reference
+++ b/tests/queries/0_stateless/02021_exponential_sum.reference
@@ -5,4 +5,3 @@
 0.0009775171065493646
 0.0009775171065493646
 0.0009775171065493646
-0.0009775171065493646
diff --git a/tests/queries/0_stateless/02021_exponential_sum.sql b/tests/queries/0_stateless/02021_exponential_sum.sql
index 8ab7638029c..62ec7dcf9f1 100644
--- a/tests/queries/0_stateless/02021_exponential_sum.sql
+++ b/tests/queries/0_stateless/02021_exponential_sum.sql
@@ -6,4 +6,3 @@ WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)
 WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(100000);
 WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(1000000);
 WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(10000000);
-WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(100000000);
diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql
index 28f1afff57f..51e953da905 100644
--- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql
+++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql
@@ -67,7 +67,7 @@ SELECT toIPv6('::.1.2.3'); --{serverError CANNOT_PARSE_IPV6}
 SELECT toIPv6OrDefault('::.1.2.3');
 SELECT toIPv6OrNull('::.1.2.3');
 
-SELECT count() FROM numbers_mt(100000000) WHERE NOT ignore(toIPv6OrZero(randomString(8)));
+SELECT count() FROM numbers_mt(20000000) WHERE NOT ignore(toIPv6OrZero(randomString(8)));
 
 SELECT '--';
 
diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
index 0cd520d8d5d..445f907bcc5 100755
--- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
+++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
@@ -9,7 +9,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # this can trigger a hung/deadlock in ProcessorList.
 for i in {1..50}; do
     query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i"
-    $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null &
+    $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null &
     while :; do
         killed_queries="$($CLICKHOUSE_CLIENT -q "kill query where query_id = '$query_id' sync" | wc -l)"
         if [[ "$killed_queries" -ge 1 ]]; then
diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh
index e3e34109cdb..6321ac0064a 100755
--- a/tests/queries/0_stateless/02585_query_status_deadlock.sh
+++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1"
 
-$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" -n -q "
+$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" --max_rows_to_read 0 -n -q "
 create temporary table tmp as select * from numbers(500000000);
 select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null &
 
@@ -23,4 +23,3 @@ do
 done
 
 $CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null
-
diff --git a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql
index f678c913b46..2e4623f4ac6 100644
--- a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql
+++ b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql
@@ -1,4 +1,5 @@
 -- Tags: no-fasttest
+SET max_rows_to_read = 0;
 SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) SETTINGS max_execution_time_leaf = 1; -- { serverError TIMEOUT_EXCEEDED }
 -- Can return partial result
 SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) FORMAT Null SETTINGS max_execution_time_leaf = 1, timeout_overflow_mode_leaf = 'break';
diff --git a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql
index 511ed0c59de..00b527a9378 100644
--- a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql
+++ b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql
@@ -4,7 +4,7 @@ CREATE TABLE t (key UInt64, value UInt64, INDEX value_idx value TYPE bloom_filte
 INSERT INTO t SELECT number, rand()%1000 FROM numbers(10000);
 
 SET timeout_overflow_mode='break';
-SET max_execution_time=0.1;
+SET max_execution_time=0.1, max_rows_to_read=0;
 SELECT * FROM t WHERE value IN (SELECT number FROM numbers(1000000000));
 
 DROP TABLE t;
diff --git a/tests/queries/0_stateless/02916_glogal_in_cancel.sql b/tests/queries/0_stateless/02916_glogal_in_cancel.sql
index ad54f1ecdec..dd61795947a 100644
--- a/tests/queries/0_stateless/02916_glogal_in_cancel.sql
+++ b/tests/queries/0_stateless/02916_glogal_in_cancel.sql
@@ -1,2 +1,2 @@
-set max_execution_time = 0.5, timeout_overflow_mode = 'break';
+set max_execution_time = 0.5, timeout_overflow_mode = 'break', max_rows_to_read = 0;
 SELECT number FROM remote('127.0.0.{3|2}', numbers(1)) WHERE number GLOBAL IN (SELECT number FROM numbers(10000000000.)) format Null;

From 26650dcb2e39b0d28cae4029b6adde2b6c01fda2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 04:59:13 +0200
Subject: [PATCH 045/363] More limits

---
 tests/config/install.sh          |  2 +-
 tests/config/users.d/limits.xml  |  8 -----
 tests/config/users.d/limits.yaml | 57 ++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 9 deletions(-)
 delete mode 100644 tests/config/users.d/limits.xml
 create mode 100644 tests/config/users.d/limits.yaml

diff --git a/tests/config/install.sh b/tests/config/install.sh
index 265b9248f4a..c5fb3cc92c7 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -93,7 +93,7 @@ ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/nonconst_timezone.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/allow_introspection_functions.yaml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/replicated_ddl_entry.xml $DEST_SERVER_PATH/users.d/
-ln -sf $SRC_PATH/users.d/limits.xml $DEST_SERVER_PATH/users.d/
+ln -sf $SRC_PATH/users.d/limits.yaml $DEST_SERVER_PATH/users.d/
 
 if [[ -n "$USE_OLD_ANALYZER" ]] && [[ "$USE_OLD_ANALYZER" -eq 1 ]]; then
     ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/
diff --git a/tests/config/users.d/limits.xml b/tests/config/users.d/limits.xml
deleted file mode 100644
index f44c73241ab..00000000000
--- a/tests/config/users.d/limits.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<clickhouse>
-    <profiles>
-        <default>
-            <max_memory_usage>5G</max_memory_usage>
-            <max_rows_to_read>20000000</max_rows_to_read>
-        </default>
-    </profiles>
-</clickhouse>
diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml
new file mode 100644
index 00000000000..4f3f439a997
--- /dev/null
+++ b/tests/config/users.d/limits.yaml
@@ -0,0 +1,57 @@
+profiles:
+  default:
+    max_memory_usage: 5G
+    max_rows_to_read: 20000000
+
+    # Also set every other limit to a high value, so it will not limit anything, but we will test that code around it.
+    s3_max_get_rps: 1000000
+    s3_max_get_burst: 2000000
+    s3_max_put_rps: 1000000
+    s3_max_put_burst: 2000000
+    max_remote_read_network_bandwidth: 1T
+    max_remote_write_network_bandwidth: 1T
+    max_local_read_bandwidth: 1T
+    max_local_write_bandwidth: 1T
+    use_index_for_in_with_subqueries_max_values: 1G
+    max_bytes_to_read: 1T
+    max_bytes_to_read_leaf: 1T
+    max_rows_to_group_by: 10G
+    max_bytes_before_external_group_by: 10G
+    max_rows_to_sort: 10G
+    max_bytes_to_sort: 10G
+    max_bytes_before_external_sort: 10G
+    max_result_rows: 1G
+    max_result_bytes: 1G
+    max_execution_time: 600
+    max_execution_time_leaf: 600
+    max_execution_speed: 100G
+    max_execution_speed_bytes: 10T
+    max_estimated_execution_time: 600
+    max_columns_to_read: 10K
+    max_temporary_columns: 10K
+    max_temporary_non_const_columns: 10K
+    max_sessions_for_user: 1K
+    max_rows_in_set: 10G
+    max_bytes_in_set: 10G
+    max_rows_in_join: 10G
+    max_bytes_in_join: 10G
+    max_rows_in_set_to_optimize_join: 1G
+    max_rows_to_transfer: 1G
+    max_bytes_to_transfer: 1G
+    max_rows_in_distinct: 10G
+    max_bytes_in_distinct: 10G
+    max_memory_usage_for_user: 10G
+    max_network_bandwidth: 100G
+    max_network_bytes: 1T
+    max_network_bandwidth_for_user: 100G
+    max_network_bandwidth_for_all_users: 100G
+    max_temporary_data_on_disk_size_for_user: 100G
+    max_temporary_data_on_disk_size_for_query: 100G
+    max_backup_bandwidth: 100G
+    max_hyperscan_regexp_length: 1M
+    max_hyperscan_regexp_total_length: 10M
+    query_cache_max_size_in_bytes: 10M
+    query_cache_max_entries: 100K
+    external_storage_max_read_rows: 10G
+    external_storage_max_read_bytes: 10G
+    max_streams_for_merge_tree_reading: 1000

From fd3f0cf92b7800b171c5723541a329748a0dad1b Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 24 Jul 2024 14:17:58 +0800
Subject: [PATCH 046/363] support overlayUTF8

---
 src/Functions/FunctionOverlay.cpp             | 281 +++++++++++++-----
 .../0_stateless/03205_overlay.reference       | 168 +++++++++++
 tests/queries/0_stateless/03205_overlay.sql   |  60 ++++
 .../0_stateless/03206_overlay_utf8.reference  | 168 +++++++++++
 .../0_stateless/03206_overlay_utf8.sql        |  60 ++++
 5 files changed, 665 insertions(+), 72 deletions(-)
 create mode 100644 tests/queries/0_stateless/03205_overlay.reference
 create mode 100644 tests/queries/0_stateless/03205_overlay.sql
 create mode 100644 tests/queries/0_stateless/03206_overlay_utf8.reference
 create mode 100644 tests/queries/0_stateless/03206_overlay_utf8.sql

diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp
index d3ee7e1df6d..61d2df88ab1 100644
--- a/src/Functions/FunctionOverlay.cpp
+++ b/src/Functions/FunctionOverlay.cpp
@@ -3,8 +3,10 @@
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
+#include <Functions/GatherUtils/Sources.h>
 #include <Functions/IFunction.h>
 #include <Common/StringUtils.h>
+#include <Common/UTF8Helpers.h>
 
 namespace DB
 {
@@ -15,6 +17,8 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+using namespace GatherUtils;
+
 namespace
 {
 
@@ -24,7 +28,7 @@ template <bool is_utf8>
 class FunctionOverlay : public IFunction
 {
 public:
-    static constexpr auto name = is_utf8 ? "OverlayUTF8" : "Overlay";
+    static constexpr auto name = is_utf8 ? "overlayUTF8" : "overlay";
 
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionOverlay>(); }
     String getName() const override { return name; }
@@ -238,6 +242,15 @@ private:
         }
     }
 
+    /// get character count of a slice [data, data+bytes)
+    static size_t getSliceSize(const UInt8 * data, size_t bytes)
+    {
+        if constexpr (is_utf8)
+            return UTF8::countCodePoints(data, bytes);
+        else
+            return bytes;
+    }
+
     template <bool three_args, bool offset_is_const, bool length_is_const>
     void constantConstant(
         size_t rows,
@@ -257,13 +270,12 @@ private:
             return;
         }
 
-        size_t input_size = input.size;
+        size_t input_size = getSliceSize(reinterpret_cast<const UInt8 *>(input.data), input.size);
         size_t valid_offset = 0; // start from 0, not negative
         if constexpr (offset_is_const)
             valid_offset = getValidOffset(const_offset, input_size);
 
-        size_t replace_size = replace.size;
-        Int64 length = 0; // maybe negative
+        size_t replace_size = getSliceSize(reinterpret_cast<const UInt8 *>(replace.data), replace.size);
         size_t valid_length = 0; // not negative
         if constexpr (!three_args && length_is_const)
         {
@@ -276,6 +288,9 @@ private:
         }
 
         Int64 offset = 0; // start from 1, maybe negative
+        Int64 length = 0; // maybe negative
+        const UInt8 * input_begin = reinterpret_cast<const UInt8 *>(input.data);
+        const UInt8 * input_end = reinterpret_cast<const UInt8 *>(input.data + input.size);
         size_t res_offset = 0;
         for (size_t i = 0; i < rows; ++i)
         {
@@ -293,28 +308,57 @@ private:
 
             size_t prefix_size = valid_offset;
             size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
-            size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
-            res_data.resize(new_res_size);
 
-            /// copy prefix before replaced region
-            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size);
-            res_offset += prefix_size;
-
-            /// copy replace
-            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size);
-            res_offset += replace_size;
-
-            /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
-            if (suffix_size)
+            if constexpr (!is_utf8)
             {
-                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size);
-                res_offset += suffix_size;
+                size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
+                res_data.resize(new_res_size);
+
+                /// copy prefix before replaced region
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size);
+                res_offset += prefix_size;
+
+                /// copy replace
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size);
+                res_offset += replace_size;
+
+                /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
+                if (suffix_size)
+                {
+                    memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size);
+                    res_offset += suffix_size;
+                }
+            }
+            else
+            {
+                const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
+                size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin;
+
+                const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
+                size_t suffix_bytes = input_end - suffix_begin;
+
+                size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator
+                res_data.resize(new_res_size);
+
+                /// copy prefix before replaced region
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes);
+                res_offset += prefix_bytes;
+
+                /// copy replace
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace.size);
+                res_offset += replace.size;
+
+                /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero.
+                if (suffix_bytes)
+                {
+                    memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes);
+                    res_offset += suffix_bytes;
+                }
             }
 
             /// add zero terminator
             res_data[res_offset] = 0;
             ++res_offset;
-
             res_offsets[i] = res_offset;
         }
     }
@@ -338,7 +382,7 @@ private:
             return;
         }
 
-        size_t replace_size = replace.size;
+        size_t replace_size = getSliceSize(reinterpret_cast<const UInt8 *>(replace.data), replace.size);
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
         if constexpr (!three_args && length_is_const)
@@ -358,7 +402,8 @@ private:
         for (size_t i = 0; i < rows; ++i)
         {
             size_t input_offset = input_offsets[i - 1];
-            size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1;
+            size_t input_bytes = input_offsets[i] - input_offsets[i - 1] - 1;
+            size_t input_size = getSliceSize(&input_data[input_offset], input_bytes);
 
             if constexpr (offset_is_const)
             {
@@ -378,29 +423,59 @@ private:
 
             size_t prefix_size = valid_offset;
             size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
-            size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
-            res_data.resize(new_res_size);
 
-            /// copy prefix before replaced region
-            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size);
-            res_offset += prefix_size;
-
-            /// copy replace
-            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size);
-            res_offset += replace_size;
-
-            /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
-            if (suffix_size)
+            if constexpr (!is_utf8)
             {
-                memcpySmallAllowReadWriteOverflow15(
-                    &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size);
-                res_offset += suffix_size;
+                size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
+                res_data.resize(new_res_size);
+
+                /// copy prefix before replaced region
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size);
+                res_offset += prefix_size;
+
+                /// copy replace
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size);
+                res_offset += replace_size;
+
+                /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
+                if (suffix_size)
+                {
+                    memcpySmallAllowReadWriteOverflow15(
+                        &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size);
+                    res_offset += suffix_size;
+                }
+            }
+            else
+            {
+                const auto * input_begin = &input_data[input_offset];
+                const auto * input_end = &input_data[input_offset + input_bytes];
+                const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
+                size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin;
+                const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
+                size_t suffix_bytes = input_end - suffix_begin;
+
+                size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator
+                res_data.resize(new_res_size);
+
+                /// copy prefix before replaced region
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_bytes);
+                res_offset += prefix_bytes;
+
+                /// copy replace
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace.size);
+                res_offset += replace.size;
+
+                /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero.
+                if (suffix_bytes)
+                {
+                    memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes);
+                    res_offset += suffix_bytes;
+                }
             }
 
             /// add zero terminator
             res_data[res_offset] = 0;
             ++res_offset;
-
             res_offsets[i] = res_offset;
         }
     }
@@ -424,7 +499,7 @@ private:
             return;
         }
 
-        size_t input_size = input.size;
+        size_t input_size = getSliceSize(reinterpret_cast<const UInt8 *>(input.data), input.size);
         size_t valid_offset = 0; // start from 0, not negative
         if constexpr (offset_is_const)
             valid_offset = getValidOffset(const_offset, input_size);
@@ -438,12 +513,15 @@ private:
         }
 
         size_t rows = replace_offsets.size();
+        const auto * input_begin = reinterpret_cast<const UInt8 *>(input.data);
+        const auto * input_end = reinterpret_cast<const UInt8 *>(input.data + input.size);
         Int64 offset = 0; // start from 1, maybe negative
         size_t res_offset = 0;
         for (size_t i = 0; i < rows; ++i)
         {
             size_t replace_offset = replace_offsets[i - 1];
-            size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1;
+            size_t replace_bytes = replace_offsets[i] - replace_offsets[i - 1] - 1;
+            size_t replace_size = getSliceSize(&replace_data[replace_offset], replace_bytes);
 
             if constexpr (!offset_is_const)
             {
@@ -463,28 +541,55 @@ private:
 
             size_t prefix_size = valid_offset;
             size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
-            size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
-            res_data.resize(new_res_size);
 
-            /// copy prefix before replaced region
-            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size);
-            res_offset += prefix_size;
-
-            /// copy replace
-            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size);
-            res_offset += replace_size;
-
-            /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
-            if (suffix_size)
+            if constexpr (!is_utf8)
             {
-                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size);
-                res_offset += suffix_size;
+                size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
+                res_data.resize(new_res_size);
+
+                /// copy prefix before replaced region
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size);
+                res_offset += prefix_size;
+
+                /// copy replace
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size);
+                res_offset += replace_size;
+
+                /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
+                if (suffix_size)
+                {
+                    memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size);
+                    res_offset += suffix_size;
+                }
+            }
+            else
+            {
+                const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
+                size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin;
+                const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
+                size_t suffix_bytes = input_end - suffix_begin;
+                size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator
+                res_data.resize(new_res_size);
+
+                /// copy prefix before replaced region
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes);
+                res_offset += prefix_bytes;
+
+                /// copy replace
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_bytes);
+                res_offset += replace_bytes;
+
+                /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero
+                if (suffix_bytes)
+                {
+                    memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes);
+                    res_offset += suffix_bytes;
+                }
             }
 
             /// add zero terminator
             res_data[res_offset] = 0;
             ++res_offset;
-
             res_offsets[i] = res_offset;
         }
     }
@@ -533,9 +638,12 @@ private:
         for (size_t i = 0; i < rows; ++i)
         {
             size_t input_offset = input_offsets[i - 1];
-            size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1;
+            size_t input_bytes = input_offsets[i] - input_offsets[i - 1] - 1;
+            size_t input_size = getSliceSize(&input_data[input_offset], input_bytes);
+
             size_t replace_offset = replace_offsets[i - 1];
-            size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1;
+            size_t replace_bytes = replace_offsets[i] - replace_offsets[i - 1] - 1;
+            size_t replace_size = getSliceSize(&replace_data[replace_offset], replace_bytes);
 
             if constexpr (offset_is_const)
             {
@@ -559,29 +667,58 @@ private:
 
             size_t prefix_size = valid_offset;
             size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
-            size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
-            res_data.resize(new_res_size);
 
-            /// copy prefix before replaced region
-            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size);
-            res_offset += prefix_size;
-
-            /// copy replace
-            memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size);
-            res_offset += replace_size;
-
-            /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
-            if (suffix_size)
+            if constexpr (!is_utf8)
             {
-                memcpySmallAllowReadWriteOverflow15(
-                    &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size);
-                res_offset += suffix_size;
+                size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator
+                res_data.resize(new_res_size);
+
+                /// copy prefix before replaced region
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size);
+                res_offset += prefix_size;
+
+                /// copy replace
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size);
+                res_offset += replace_size;
+
+                /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero.
+                if (suffix_size)
+                {
+                    memcpySmallAllowReadWriteOverflow15(
+                        &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size);
+                    res_offset += suffix_size;
+                }
+            }
+            else
+            {
+                const auto * input_begin = &input_data[input_offset];
+                const auto * input_end = &input_data[input_offset + input_bytes];
+                const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
+                size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin;
+                const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
+                size_t suffix_bytes = input_end - suffix_begin;
+                size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator
+                res_data.resize(new_res_size);
+
+                /// copy prefix before replaced region
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes);
+                res_offset += prefix_bytes;
+
+                /// copy replace
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_bytes);
+                res_offset += replace_bytes;
+
+                /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero.
+                if (suffix_bytes)
+                {
+                    memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes);
+                    res_offset += suffix_bytes;
+                }
             }
 
             /// add zero terminator
             res_data[res_offset] = 0;
             ++res_offset;
-
             res_offsets[i] = res_offset;
         }
     }
diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference
new file mode 100644
index 00000000000..9e79db2e131
--- /dev/null
+++ b/tests/queries/0_stateless/03205_overlay.reference
@@ -0,0 +1,168 @@
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark_SQL
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark CORE
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Spark ANSI SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
+Structured SQL
diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql
new file mode 100644
index 00000000000..b131312c934
--- /dev/null
+++ b/tests/queries/0_stateless/03205_overlay.sql
@@ -0,0 +1,60 @@
+SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3);
+SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3);
+SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3);
+SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3);
+SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3);
+SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3);
+SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)) from numbers(3);
+SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
+
+SELECT overlay('Spark SQL', '_', 6) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), '_', 6) from numbers(3);
+SELECT overlay('Spark SQL', materialize('_'), 6) from numbers(3);
+SELECT overlay('Spark SQL', '_', materialize(6)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('_'), 6) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), '_', materialize(6)) from numbers(3);
+SELECT overlay('Spark SQL', materialize('_'), materialize(6)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)) from numbers(3);
+
+SELECT overlay('Spark SQL', 'CORE', 7) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'CORE', 7) from numbers(3);
+SELECT overlay('Spark SQL', materialize('CORE'), 7) from numbers(3);
+SELECT overlay('Spark SQL', 'CORE', materialize(7)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)) from numbers(3);
+SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)) from numbers(3);
+
+SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3);
+SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3);
+SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3);
+SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3);
+SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3);
+SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3);
+SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
+
+SELECT overlay('Spark SQL', 'tructured', 2, 4) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4) from numbers(3);
+SELECT overlay('Spark SQL', materialize('tructured'), 2, 4) from numbers(3);
+SELECT overlay('Spark SQL', 'tructured', materialize(2), 4) from numbers(3);
+SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)) from numbers(3);
+SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4) from numbers(3);
+SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)) from numbers(3);
+SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)) from numbers(3);
+SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3);
diff --git a/tests/queries/0_stateless/03206_overlay_utf8.reference b/tests/queries/0_stateless/03206_overlay_utf8.reference
new file mode 100644
index 00000000000..19878c97184
--- /dev/null
+++ b/tests/queries/0_stateless/03206_overlay_utf8.reference
@@ -0,0 +1,168 @@
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark_SQL和CH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark CORECH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Spark ANSI SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
+Structured SQL和CH
diff --git a/tests/queries/0_stateless/03206_overlay_utf8.sql b/tests/queries/0_stateless/03206_overlay_utf8.sql
new file mode 100644
index 00000000000..00b756c8b5b
--- /dev/null
+++ b/tests/queries/0_stateless/03206_overlay_utf8.sql
@@ -0,0 +1,60 @@
+SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
+
+SELECT overlayUTF8('Spark SQL和CH', '_', 6) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', 6) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('_'), 6) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', '_', materialize(6)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)) from numbers(3);
+
+SELECT overlayUTF8('Spark SQL和CH', 'CORE', 7) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), 7) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)) from numbers(3);
+
+SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
+
+SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, 4) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)) from numbers(3);
+SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)) from numbers(3);
+SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3);

From c09c22b17575396e38fb45cb385dcc8a49f9a183 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 24 Jul 2024 14:45:47 +0800
Subject: [PATCH 047/363] finish doc

---
 .../functions/string-replace-functions.md     | 72 +++++++++++++++++++
 ...new_functions_must_be_documented.reference |  2 +
 2 files changed, 74 insertions(+)

diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index 8793ebdd1a3..4e1f89fd974 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -223,3 +223,75 @@ SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res;
 │ Munchener Strase │
 └──────────────────┘
 ```
+
+## overlay
+
+Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`.
+
+**Syntax**
+
+```sql
+overlay(s, replace, position[, length])
+```
+
+**Parameters**
+
+- `s`: A string type [String](../data-types/string.md).
+- `replace`: A string type [String](../data-types/string.md).
+- `position`: An integer type [Int](../data-types/int.md).
+- `length`: Optional. An integer type [Int](../data-types/int.md).
+
+**Returned value**
+
+- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced.
+
+**Example**
+
+```sql
+SELECT overlay('Spark SQL', 'CORE', 7) AS res;
+```
+
+Result:
+
+```text
+ ┌─res────────┐
+ │ Spark CORE │
+ └────────────┘
+```
+
+## overlayUTF8
+
+Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`.
+
+Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+**Syntax**
+
+```sql
+overlayUTF8(s, replace, position[, length])
+```
+
+**Parameters**
+
+- `s`: A string type [String](../data-types/string.md).
+- `replace`: A string type [String](../data-types/string.md).
+- `position`: An integer type [Int](../data-types/int.md).
+- `length`: Optional. An integer type [Int](../data-types/int.md).
+
+**Returned value**
+
+- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced.
+
+**Example**
+
+```sql
+SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res;
+```
+
+Result:
+
+```text
+┌─res────────────────────────┐
+│ ClickHouse是开源OLAP数据库   │
+└────────────────────────────┘
+```
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index a152066a460..ba9d3fb7a83 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -512,6 +512,8 @@ nullIf
 nullIn
 nullInIgnoreSet
 or
+overlay
+overlayUTF8
 parseDateTime
 parseDateTime32BestEffort
 parseDateTime32BestEffortOrNull

From 63e586d17af1e8a92fc4d2e8af71f1dba4996fea Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 09:57:56 +0200
Subject: [PATCH 048/363] Adjust tests

---
 tests/config/users.d/limits.yaml              |  6 +--
 ..._shard_external_sort_distributed.reference | 40 +++++++++----------
 .../00111_shard_external_sort_distributed.sql |  2 +-
 ...00375_shard_group_uniq_array_of_string.sql |  2 +-
 ...76_shard_group_uniq_array_of_int_array.sql |  2 +-
 ...shard_group_uniq_array_of_string_array.sql |  2 +-
 .../00600_replace_running_query.sh            |  4 +-
 .../00601_kill_running_query.reference        |  2 +-
 .../00906_low_cardinality_cache.sql           |  3 +-
 .../01091_query_profiler_does_not_hang.sql    |  2 +-
 .../0_stateless/01293_show_settings.reference |  1 +
 .../0_stateless/01485_256_bit_multiply.sql    |  2 +
 .../01603_read_with_backoff_bug.sql           |  1 +
 .../01961_roaring_memory_tracking.sql         |  2 +-
 .../02003_memory_limit_in_client.sh           |  6 +--
 .../02161_addressToLineWithInlines.sql        |  2 +-
 .../02226_analyzer_or_like_combine.sql        |  2 +
 .../02234_cast_to_ip_address.reference        |  8 ++--
 .../02343_aggregation_pipeline.reference      |  6 +--
 .../02353_simdjson_buffer_overflow.sql        |  1 +
 .../0_stateless/02372_now_in_block.sql        |  1 +
 .../02536_delta_gorilla_corruption.sql        |  2 +-
 22 files changed, 54 insertions(+), 45 deletions(-)

diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml
index 4f3f439a997..1c9fff9f4c8 100644
--- a/tests/config/users.d/limits.yaml
+++ b/tests/config/users.d/limits.yaml
@@ -27,9 +27,9 @@ profiles:
     max_execution_speed: 100G
     max_execution_speed_bytes: 10T
     max_estimated_execution_time: 600
-    max_columns_to_read: 10K
-    max_temporary_columns: 10K
-    max_temporary_non_const_columns: 10K
+    max_columns_to_read: 20K
+    max_temporary_columns: 20K
+    max_temporary_non_const_columns: 20K
     max_sessions_for_user: 1K
     max_rows_in_set: 10G
     max_bytes_in_set: 10G
diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference b/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference
index df5aa77af60..7534c12a0d8 100644
--- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference
+++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference
@@ -1,20 +1,20 @@
-7040546
-7040546
-4327029
-4327029
-1613512
-1613512
-8947307
-8947307
-6233790
-6233790
-3520273
-3520273
-806756
-806756
-8140551
-8140551
-5427034
-5427034
-2713517
-2713517
+4437158
+4437158
+1723641
+1723641
+3630402
+3630402
+916885
+916885
+2823646
+2823646
+110129
+110129
+4730407
+4730407
+2016890
+2016890
+3923651
+3923651
+1210134
+1210134
diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
index 88a05f59111..ef9c0f9f9d0 100644
--- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
+++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
@@ -6,6 +6,6 @@ SET max_bytes_before_external_sort = 10000000;
 DROP TABLE IF EXISTS numbers10m;
 CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 5000000;
 
-SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 19999980, 20;
+SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 4999980, 20;
 
 DROP TABLE numbers10m;
diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
index f32a64cd30f..445ffe66f64 100644
--- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
+++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
@@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst
 INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000;
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M';
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '60M';
 SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
 SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
 
diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
index 43066880102..7593e1e1580 100644
--- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
+++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
@@ -6,7 +6,7 @@ CREATE TABLE group_uniq_arr_int ENGINE = Memory AS
 		(SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000);
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M';
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M';
 SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
 SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
 
diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
index 1c4376ad577..8b48ee673f3 100644
--- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
+++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
@@ -6,6 +6,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS
         (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000);
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M';
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M';
 
 DROP TABLE IF EXISTS group_uniq_arr_str;
diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh
index e7022875086..8f21443d589 100755
--- a/tests/queries/0_stateless/00600_replace_running_query.sh
+++ b/tests/queries/0_stateless/00600_replace_running_query.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 TEST_PREFIX=$RANDOM
 ${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600${TEST_PREFIX}"
-${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1"
+${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1, max_rows_to_read=0"
 ${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600${TEST_PREFIX}"
 
 function wait_for_query_to_start()
@@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d
 # Wait for it to be replaced
 wait
 
-${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --max_rows_to_read=0 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
+${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
 wait_for_query_to_start '42'
 
 # Trying to run another query with the same query_id
diff --git a/tests/queries/0_stateless/00601_kill_running_query.reference b/tests/queries/0_stateless/00601_kill_running_query.reference
index 3917ff89482..7824d5804bc 100644
--- a/tests/queries/0_stateless/00601_kill_running_query.reference
+++ b/tests/queries/0_stateless/00601_kill_running_query.reference
@@ -1 +1 @@
-waiting	test_00601_default	default	SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k)
+waiting	test_00601_default	default	SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) SETTINGS max_rows_to_read = 0
diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql
index 15a53841761..efd96746dc4 100644
--- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql
+++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql
@@ -1,5 +1,6 @@
+SET max_rows_to_read = '100M'
 drop table if exists lc_00906;
 create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
-insert into lc_00906 select '0123456789' from numbers(100000000) SETTINGS max_rows_to_read = '100M';
+insert into lc_00906 select '0123456789' from numbers(100000000);
 select count(), b from lc_00906 group by b;
 drop table if exists lc_00906;
diff --git a/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql b/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql
index 21a84bdd691..45f1a00ae23 100644
--- a/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql
+++ b/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql
@@ -1,4 +1,4 @@
 -- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug
 
-SET query_profiler_cpu_time_period_ns = 1;
+SET query_profiler_cpu_time_period_ns = 1, max_rows_to_read = 0;
 SELECT count() FROM numbers_mt(1000000000);
diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference
index 9d326f16a3b..8b383813c9f 100644
--- a/tests/queries/0_stateless/01293_show_settings.reference
+++ b/tests/queries/0_stateless/01293_show_settings.reference
@@ -6,5 +6,6 @@ external_storage_connect_timeout_sec	UInt64	10
 s3_connect_timeout_ms	UInt64	1000
 filesystem_prefetch_max_memory_usage	UInt64	1073741824
 max_memory_usage	UInt64	5000000000
+max_memory_usage_for_user	UInt64	10000000000
 max_untracked_memory	UInt64	1048576
 memory_profiler_step	UInt64	1048576
diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql
index 5c8c47c9127..18be2b11599 100644
--- a/tests/queries/0_stateless/01485_256_bit_multiply.sql
+++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql
@@ -1,5 +1,7 @@
 -- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug
 
+SET max_rows_to_read = '100M'
+
 select count() from
 (
     select toInt128(number) * number x, toInt256(number) * number y from numbers_mt(100000000) where x != y
diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
index ec14f637c01..b68d15a2200 100644
--- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
+++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
@@ -3,6 +3,7 @@
 
 set enable_filesystem_cache=0;
 set enable_filesystem_cache_on_write_operations=0;
+set max_rows_to_read = '30M';
 drop table if exists t;
 
 create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
index 485c8192f69..79c722bd629 100644
--- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
+++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
@@ -2,5 +2,5 @@
 
 SET max_bytes_before_external_group_by = 0;
 
-SET max_memory_usage = '100M';
+SET max_memory_usage = '100M', max_rows_to_read = '1B';
 SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError MEMORY_LIMIT_EXCEEDED }
diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.sh b/tests/queries/0_stateless/02003_memory_limit_in_client.sh
index 96028f4847a..94eba8f25be 100755
--- a/tests/queries/0_stateless/02003_memory_limit_in_client.sh
+++ b/tests/queries/0_stateless/02003_memory_limit_in_client.sh
@@ -4,11 +4,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }"
+$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }"
 $CLICKHOUSE_CLIENT --max_memory_usage_in_client=0 -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000"
 
-$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5K' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }"
-$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5k' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }"
+$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client='5K' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }"
+$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client='5k' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }"
 $CLICKHOUSE_CLIENT --max_memory_usage_in_client='1M' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000"
 $CLICKHOUSE_CLIENT --max_memory_usage_in_client='23G' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000"
 $CLICKHOUSE_CLIENT --max_memory_usage_in_client='11T' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000"
diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql
index cf400ed34c5..d7ce133f38c 100644
--- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql
+++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql
@@ -6,7 +6,7 @@ SELECT addressToLineWithInlines(1); -- { serverError FUNCTION_NOT_ALLOWED }
 SET allow_introspection_functions = 1;
 SET query_profiler_real_time_period_ns = 0;
 SET query_profiler_cpu_time_period_ns = 1000000;
-SET log_queries = 1;
+SET log_queries = 1, max_rows_to_read = 0;
 SELECT count() FROM numbers_mt(10000000000) SETTINGS log_comment='02161_test_case';
 SET log_queries = 0;
 SET query_profiler_cpu_time_period_ns = 0;
diff --git a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql
index fbebfc6d281..1cd6b8a4e4d 100644
--- a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql
+++ b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql
@@ -1,3 +1,5 @@
+SET allow_hyperscan = 1, max_hyperscan_regexp_length = 0, max_hyperscan_regexp_total_length = 0;
+
 EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0;
 EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1;
 EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1;
diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference
index fa9c6bd0f94..3dd306477b9 100644
--- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference
+++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference
@@ -26,9 +26,9 @@ IPv4 functions
 IPv6 functions
 \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0
 \N
-\0\0\0\0\0\0\0\0\0\0��\0\0
-\0\0\0\0\0\0\0\0\0\0��\0\0
-\0\0\0\0\0\0\0\0\0\0��\0\0
+\0\0\0\0\0\0\0\0\0\0��\0\0
+\0\0\0\0\0\0\0\0\0\0��\0\0
+\0\0\0\0\0\0\0\0\0\0��\0\0
 --
 ::
 \N
@@ -37,7 +37,7 @@ IPv6 functions
 ::ffff:127.0.0.1
 ::
 \N
-100000000
+20000000
 --
 ::ffff:127.0.0.1
 --
diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.reference b/tests/queries/0_stateless/02343_aggregation_pipeline.reference
index bf61eb6da0a..eb013200a17 100644
--- a/tests/queries/0_stateless/02343_aggregation_pipeline.reference
+++ b/tests/queries/0_stateless/02343_aggregation_pipeline.reference
@@ -1,6 +1,6 @@
 -- { echoOn }
 
-explain pipeline select * from (select * from numbers(1e8) group by number) group by number;
+explain pipeline select * from (select * from numbers(1e8) group by number) group by number settings max_rows_to_read = 0;
 (Expression)
 ExpressionTransform × 16
   (Aggregating)
@@ -16,7 +16,7 @@ ExpressionTransform × 16
               ExpressionTransform
                 (ReadFromSystemNumbers)
                 NumbersRange 0 → 1
-explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number;
+explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number settings max_rows_to_read = 0;
 (Expression)
 ExpressionTransform × 16
   (Aggregating)
@@ -32,7 +32,7 @@ ExpressionTransform × 16
               ExpressionTransform × 16
                 (ReadFromSystemNumbers)
                 NumbersRange × 16 0 → 1
-explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number;
+explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number settings max_rows_to_read = 0;
 (Expression)
 ExpressionTransform
   (Sorting)
diff --git a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql
index b324f834053..e7c6c272102 100644
--- a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql
+++ b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql
@@ -2,5 +2,6 @@
 
 SET max_execution_time = 3;
 SET timeout_overflow_mode = 'break';
+SET max_rows_to_read = 0, max_bytes_to_read = 0;
 
 SELECT count() FROM system.numbers_mt WHERE NOT ignore(JSONExtract('{' || repeat('"a":"b",', rand() % 10) || '"c":"d"}', 'a', 'String')) FORMAT Null;
diff --git a/tests/queries/0_stateless/02372_now_in_block.sql b/tests/queries/0_stateless/02372_now_in_block.sql
index aee4572ce8d..d0aec471801 100644
--- a/tests/queries/0_stateless/02372_now_in_block.sql
+++ b/tests/queries/0_stateless/02372_now_in_block.sql
@@ -1,3 +1,4 @@
+SET max_rows_to_read = 0, max_bytes_to_read = 0;
 SELECT count() FROM (SELECT DISTINCT nowInBlock(), nowInBlock('Pacific/Pitcairn') FROM system.numbers LIMIT 2);
 SELECT nowInBlock(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT nowInBlock(NULL) IS NULL;
diff --git a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql
index a4e0965e329..3accc726d08 100644
--- a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql
+++ b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql
@@ -12,7 +12,7 @@ create table bug_delta_gorilla
 (value_bug UInt64 codec (Delta, Gorilla))
 engine = MergeTree
 order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'
-as (select 0 from numbers(30000000));
+as (select 0 from numbers(20000000));
 
 select count(*)
 from bug_delta_gorilla

From c2238c57231fe86883eb9b9a7042a72d28d31eae Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 10:45:33 +0200
Subject: [PATCH 049/363] Fix tests

---
 .../0_stateless/00375_shard_group_uniq_array_of_string.sql  | 2 +-
 .../00376_shard_group_uniq_array_of_int_array.sql           | 4 +++-
 .../00377_shard_group_uniq_array_of_string_array.sql        | 3 ++-
 tests/queries/0_stateless/00906_low_cardinality_cache.sql   | 2 +-
 tests/queries/0_stateless/01485_256_bit_multiply.sql        | 2 +-
 tests/queries/0_stateless/01961_roaring_memory_tracking.sql | 2 +-
 .../queries/0_stateless/02234_cast_to_ip_address.reference  | 6 +++---
 tests/queries/0_stateless/02234_cast_to_ip_address.sql      | 2 +-
 8 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
index 445ffe66f64..8db91904a6a 100644
--- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
+++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
@@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst
 INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000;
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '60M';
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '100M';
 SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
 SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id;
 
diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
index 7593e1e1580..24b7f1c30a6 100644
--- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
+++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
@@ -1,12 +1,14 @@
 -- Tags: shard
 
+SET max_rows_to_read = '55M';
+
 DROP TABLE IF EXISTS group_uniq_arr_int;
 CREATE TABLE group_uniq_arr_int ENGINE = Memory AS
 	SELECT g as id, if(c == 0, [v], if(c == 1, emptyArrayInt64(), [v, v])) as v FROM
 		(SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000);
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M';
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id;
 SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
 SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id;
 
diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
index 8b48ee673f3..180a6a04861 100644
--- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
+++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
@@ -1,4 +1,5 @@
 -- Tags: shard
+SET max_rows_to_read = '55M';
 
 DROP TABLE IF EXISTS group_uniq_arr_str;
 CREATE TABLE group_uniq_arr_str ENGINE = Memory AS
@@ -6,6 +7,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS
         (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000);
 
 SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id;
-SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M';
+SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id;
 
 DROP TABLE IF EXISTS group_uniq_arr_str;
diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql
index efd96746dc4..9c1abe1b6df 100644
--- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql
+++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql
@@ -1,4 +1,4 @@
-SET max_rows_to_read = '100M'
+SET max_rows_to_read = '100M';
 drop table if exists lc_00906;
 create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into lc_00906 select '0123456789' from numbers(100000000);
diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql
index 18be2b11599..a4e99d51970 100644
--- a/tests/queries/0_stateless/01485_256_bit_multiply.sql
+++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql
@@ -1,6 +1,6 @@
 -- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug
 
-SET max_rows_to_read = '100M'
+SET max_rows_to_read = '100M';
 
 select count() from
 (
diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
index 79c722bd629..22eb8e887f2 100644
--- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
+++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
@@ -2,5 +2,5 @@
 
 SET max_bytes_before_external_group_by = 0;
 
-SET max_memory_usage = '100M', max_rows_to_read = '1B';
+SET max_memory_usage = '100M', max_rows_to_read = '1G';
 SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError MEMORY_LIMIT_EXCEEDED }
diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference
index 3dd306477b9..b9f0a49ec4d 100644
--- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference
+++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference
@@ -26,9 +26,9 @@ IPv4 functions
 IPv6 functions
 \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0
 \N
-\0\0\0\0\0\0\0\0\0\0��\0\0
-\0\0\0\0\0\0\0\0\0\0��\0\0
-\0\0\0\0\0\0\0\0\0\0��\0\0
+\0\0\0\0\0\0\0\0\0\0��\0\0
+\0\0\0\0\0\0\0\0\0\0��\0\0
+\0\0\0\0\0\0\0\0\0\0��\0\0
 --
 ::
 \N
diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql
index 51e953da905..c851cfde927 100644
--- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql
+++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql
@@ -71,7 +71,7 @@ SELECT count() FROM numbers_mt(20000000) WHERE NOT ignore(toIPv6OrZero(randomStr
 
 SELECT '--';
 
-SELECT cast('test' , 'IPv6'); --{serverError CANNOT_PARSE_IPV6}
+SELECT cast('test' , 'IPv6'); -- { serverError CANNOT_PARSE_IPV6 }
 SELECT cast('::ffff:127.0.0.1', 'IPv6');
 
 SELECT '--';

From 2106d4769ac4fca6604dec3b66831aef4b12e943 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 10:53:28 +0200
Subject: [PATCH 050/363] Fix tests

---
 tests/config/users.d/limits.yaml                       | 1 -
 tests/queries/0_stateless/02346_additional_filters.sql | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml
index 1c9fff9f4c8..23aaccf9298 100644
--- a/tests/config/users.d/limits.yaml
+++ b/tests/config/users.d/limits.yaml
@@ -30,7 +30,6 @@ profiles:
     max_columns_to_read: 20K
     max_temporary_columns: 20K
     max_temporary_non_const_columns: 20K
-    max_sessions_for_user: 1K
     max_rows_in_set: 10G
     max_bytes_in_set: 10G
     max_rows_in_join: 10G
diff --git a/tests/queries/0_stateless/02346_additional_filters.sql b/tests/queries/0_stateless/02346_additional_filters.sql
index f6b665713ec..5a799e1c8c1 100644
--- a/tests/queries/0_stateless/02346_additional_filters.sql
+++ b/tests/queries/0_stateless/02346_additional_filters.sql
@@ -4,6 +4,8 @@ drop table if exists table_2;
 drop table if exists v_numbers;
 drop table if exists mv_table;
 
+SET max_rows_to_read = 0;
+
 create table table_1 (x UInt32, y String) engine = MergeTree order by x;
 insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
 

From 878a340317863e94aec61476e105342aef997c7b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 11:47:07 +0200
Subject: [PATCH 051/363] Fix tests

---
 docker/test/stateful/run.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index 304bfd7b533..5532df40fdf 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -200,7 +200,8 @@ else
         clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
     fi
     clickhouse-client --query "CREATE TABLE test.hits_s3  (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
-    clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
+    # AWS S3 is very inefficient, so increase memory even further:
+    clickhouse-client --max_memory_usage 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
 fi
 
 clickhouse-client --query "SHOW TABLES FROM test"

From 036485a657a35d764ad33f912d0d10b37d05e59b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 12:15:48 +0200
Subject: [PATCH 052/363] Fix error

---
 docker/test/stateful/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index 5532df40fdf..bd2e38ff00f 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -201,7 +201,7 @@ else
     fi
     clickhouse-client --query "CREATE TABLE test.hits_s3  (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
     # AWS S3 is very inefficient, so increase memory even further:
-    clickhouse-client --max_memory_usage 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
+    clickhouse-client --max_memory_usage 20G --max_memory_usage_for_user 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
 fi
 
 clickhouse-client --query "SHOW TABLES FROM test"

From e2c78844a005aa5c04e454df6ff7e0508c967d18 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 24 Jul 2024 14:53:09 +0200
Subject: [PATCH 053/363] Fix tests

---
 docker/test/stateful/run.sh                                     | 2 +-
 tests/queries/0_stateless/02177_issue_31009.sql                 | 2 ++
 .../00088_global_in_one_shard_and_rows_before_limit.sql         | 2 +-
 tests/queries/1_stateful/00147_global_in_aggregate_function.sql | 2 +-
 tests/queries/1_stateful/00167_read_bytes_from_fs.sql           | 2 +-
 .../queries/1_stateful/00182_simple_squashing_transform_bug.sql | 2 +-
 6 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index bd2e38ff00f..fde8b8ae529 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -201,7 +201,7 @@ else
     fi
     clickhouse-client --query "CREATE TABLE test.hits_s3  (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
     # AWS S3 is very inefficient, so increase memory even further:
-    clickhouse-client --max_memory_usage 20G --max_memory_usage_for_user 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
+    clickhouse-client --max_memory_usage 30G --max_memory_usage_for_user 30G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
 fi
 
 clickhouse-client --query "SHOW TABLES FROM test"
diff --git a/tests/queries/0_stateless/02177_issue_31009.sql b/tests/queries/0_stateless/02177_issue_31009.sql
index f25df59f4b4..5c62b5a9c2f 100644
--- a/tests/queries/0_stateless/02177_issue_31009.sql
+++ b/tests/queries/0_stateless/02177_issue_31009.sql
@@ -8,6 +8,8 @@ DROP TABLE IF EXISTS right;
 CREATE TABLE left ( key UInt32, value String ) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 CREATE TABLE right (  key UInt32, value String ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 
+SET max_rows_to_read = '50M';
+
 INSERT INTO left SELECT number, toString(number) FROM numbers(25367182);
 INSERT INTO right SELECT number, toString(number) FROM numbers(23124707);
 
diff --git a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql
index 443808e7bed..8f18f3740e4 100644
--- a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql
+++ b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql
@@ -1,4 +1,4 @@
 -- Tags: shard
 
-SET output_format_write_statistics = 0, max_rows_to_read = 20_000_000;
+SET output_format_write_statistics = 0, max_rows_to_read = 50_000_000;
 SELECT EventDate, count() FROM remote('127.0.0.1', test.hits) WHERE UserID GLOBAL IN (SELECT UserID FROM test.hits) GROUP BY EventDate ORDER BY EventDate LIMIT 5 FORMAT JSONCompact;
diff --git a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql
index c156f073573..f0b249e9af4 100644
--- a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql
+++ b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql
@@ -1,5 +1,5 @@
 -- Tags: global
 
-SET max_rows_to_read = 40_000_000;
+SET max_rows_to_read = 100_000_000;
 SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits);
 SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits);
diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql
index 1a98a531067..184a8edcbcb 100644
--- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql
+++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql
@@ -1,6 +1,6 @@
 -- Tags: no-random-settings
 
-SET max_memory_usage = '10G'
+SET max_memory_usage = '10G';
 SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40;
 
 -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb.
diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql
index 85bad651090..26e112cff04 100644
--- a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql
+++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql
@@ -1,7 +1,7 @@
 -- Tags: global
 
 set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387;
-set max_rows_to_read = '20M';
+set max_rows_to_read = '100M';
 
 SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits);
 SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits);

From c837541a7783f14780a7d2535dd6fa2cbf5effd5 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 25 Jul 2024 10:11:53 +0800
Subject: [PATCH 054/363] fix style

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 943caf918d6..fa26cc0ff1f 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -2166,6 +2166,7 @@ outfile
 overcommit
 overcommitted
 overfitting
+overlayUTF
 overparallelization
 packetpool
 packetsize

From 6725546b312ef52675f8fbd5f41d0ef5327a3e8a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 25 Jul 2024 19:35:21 +0200
Subject: [PATCH 055/363] Update some tests

---
 .../02884_parallel_window_functions.sql        | 18 ++++++++++--------
 .../03143_asof_join_ddb_long.reference         |  4 ++--
 .../0_stateless/03143_asof_join_ddb_long.sql   |  4 ++--
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql
index c5ab013a198..ea1cd458c65 100644
--- a/tests/queries/0_stateless/02884_parallel_window_functions.sql
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql
@@ -1,6 +1,6 @@
 -- Tags: long, no-tsan, no-asan, no-ubsan, no-msan, no-debug
 
-CREATE TABLE window_funtion_threading
+CREATE TABLE window_function_threading
 Engine = MergeTree
 ORDER BY (ac, nw)
 AS SELECT
@@ -20,7 +20,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     GROUP BY ac, nw
 )
 GROUP BY nw
@@ -40,7 +40,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     GROUP BY ac, nw
 )
 GROUP BY nw
@@ -58,7 +58,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     GROUP BY ac, nw
 )
 GROUP BY nw
@@ -66,6 +66,8 @@ ORDER BY nw ASC, R DESC
 LIMIT 10
 SETTINGS max_threads = 1;
 
+SET max_rows_to_read = 30000000;
+
 SELECT
     nw,
     sum(WR) AS R,
@@ -77,7 +79,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     WHERE (ac % 4) = 0
     GROUP BY
         ac,
@@ -88,7 +90,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     WHERE (ac % 4) = 1
     GROUP BY
         ac,
@@ -99,7 +101,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     WHERE (ac % 4) = 2
     GROUP BY
         ac,
@@ -110,7 +112,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     WHERE (ac % 4) = 3
     GROUP BY
         ac,
diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference
index 2850a8aba98..ae7f7c805f2 100644
--- a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference
+++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference
@@ -1,2 +1,2 @@
-49999983751397	10000032
-49999983751397	10000032
+7999995751397	4000032
+7999995751397	4000032
diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql
index 17a67511030..a635fd2e86a 100644
--- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql
+++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql
@@ -11,7 +11,7 @@ AS
         toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin,
         number % 4 AS key,
         number AS value
-    FROM numbers(0, 10000000);
+    FROM numbers(0, 4000000);
 
 CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin)
 AS
@@ -33,7 +33,7 @@ AS
     SELECT
         toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin,
         3 AS key
-    FROM numbers(0, 10000000);
+    FROM numbers(0, 4000000);
 
 
 SELECT SUM(value), COUNT(*)

From a7441669aa87b1551d2211ec4c3e550aaaa86b41 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 25 Jul 2024 21:43:03 +0200
Subject: [PATCH 056/363] Update some tests

---
 ...675_profile_events_from_query_log_and_client.sh |  2 +-
 .../02884_parallel_window_functions.sql            |  2 +-
 ...967_parallel_replicas_join_algo_and_analyzer.sh | 14 +++++++-------
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh
index e346d9893a7..1cf65ed8120 100755
--- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh
+++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 echo "INSERT TO S3"
 $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq "
 INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/profile_events.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10, s3_truncate_on_insert = 1;
-" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | sort
+" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | grep -v 'RequestThrottlerCount' | sort
 
 echo "CHECK WITH query_log"
 $CLICKHOUSE_CLIENT -nq "
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql
index ea1cd458c65..2207c90a4ee 100644
--- a/tests/queries/0_stateless/02884_parallel_window_functions.sql
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql
@@ -66,7 +66,7 @@ ORDER BY nw ASC, R DESC
 LIMIT 10
 SETTINGS max_threads = 1;
 
-SET max_rows_to_read = 30000000;
+SET max_rows_to_read = 40000000;
 
 SELECT
     nw,
diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
index 2840482da6d..8cefa873940 100755
--- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
+++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
@@ -90,7 +90,7 @@ $CLICKHOUSE_CLIENT -q "
 select * from (select key, value from num_1) l
 inner join (select key, value from num_2) r on l.key = r.key
 order by l.key limit 10 offset 700000
-SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
+SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -128,7 +128,7 @@ select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
+SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -154,7 +154,7 @@ select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS allow_experimental_analyzer=1, send_logs_level='trace',
+SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -179,7 +179,7 @@ select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS allow_experimental_analyzer=1, send_logs_level='trace',
+SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -205,7 +205,7 @@ select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -230,7 +230,7 @@ select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -255,7 +255,7 @@ select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |

From ba5a07bcc7b78673e58d0501e85b59e929215bac Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 25 Jul 2024 21:47:41 +0200
Subject: [PATCH 057/363] Better tests

---
 .../01301_aggregate_state_exception_memory_leak.reference       | 2 +-
 .../0_stateless/01301_aggregate_state_exception_memory_leak.sh  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference
index b20e7415f52..6282bf366d0 100644
--- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference
+++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference
@@ -1,2 +1,2 @@
-Memory limit (for query) exceeded
+Memory limit exceeded
 Ok
diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh
index 9dd800ceb09..266518d11d4 100755
--- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh
+++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh
@@ -16,5 +16,5 @@ for _ in {1..1000}; do
     if [[ $elapsed -gt 30 ]]; then
         break
     fi
-done 2>&1 | grep -o -F 'Memory limit (for query) exceeded' | uniq
+done 2>&1 | grep -o -P 'Memory limit .+ exceeded' | sed -r -e 's/(Memory limit)(.+)( exceeded)/\1\3/' | uniq
 echo 'Ok'

From f81e8aa345d64f5fbcae103f92cdc649f0d82d24 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 25 Jul 2024 22:08:32 +0200
Subject: [PATCH 058/363] Update tests

---
 .../queries/0_stateless/01010_pmj_right_table_memory_limits.sql | 2 ++
 tests/queries/0_stateless/01304_direct_io_long.sh               | 2 +-
 .../01730_distributed_group_by_no_merge_order_by_long.sql       | 2 +-
 tests/queries/0_stateless/02151_lc_prefetch.sql                 | 1 +
 .../queries/0_stateless/02344_insert_profile_events_stress.sql  | 1 +
 ...02354_distributed_with_external_aggregation_memory_usage.sql | 2 ++
 .../02481_parquet_list_monotonically_increasing_offsets.sh      | 2 +-
 tests/queries/0_stateless/02497_remote_disk_fat_column.sql      | 2 +-
 .../02896_max_execution_time_with_break_overflow_mode.sql       | 2 +-
 tests/queries/0_stateless/03143_asof_join_ddb_long.sql          | 1 +
 10 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql
index a090be85221..b8f2596f3d5 100644
--- a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql
+++ b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql
@@ -1,5 +1,7 @@
 -- Tags: no-parallel, no-fasttest, no-random-settings
 
+SET max_bytes_in_join = 0;
+SET max_rows_in_join = 0;
 SET max_memory_usage = 32000000;
 SET join_on_disk_max_files_to_merge = 4;
 
diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh
index a66239058ab..35d1440bcb5 100755
--- a/tests/queries/0_stateless/01304_direct_io_long.sh
+++ b/tests/queries/0_stateless/01304_direct_io_long.sh
@@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT --max_rows_to_read 50M --multiquery "
     INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000);
     OPTIMIZE TABLE bug FINAL;"
 LOG="$CLICKHOUSE_TMP/err-$CLICKHOUSE_DATABASE"
-$CLICKHOUSE_BENCHMARK --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$LOG"
+$CLICKHOUSE_BENCHMARK --max_rows_to_read 51M --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$LOG"
 cat "$LOG" | grep Exception
 cat "$LOG" | grep Loaded
 
diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
index 6625ad916e8..6172afbc699 100644
--- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
@@ -12,7 +12,7 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n
 -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block,
 -- so the initiator will first receive all blocks from remotes and only after start merging,
 -- and will hit the memory limit.
-select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED }
+select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296, max_rows_to_read=0; -- { serverError MEMORY_LIMIT_EXCEEDED }
 
 -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently,
 -- since they don't need to wait until the aggregation will be finished,
diff --git a/tests/queries/0_stateless/02151_lc_prefetch.sql b/tests/queries/0_stateless/02151_lc_prefetch.sql
index c2b97231145..f8c76038120 100644
--- a/tests/queries/0_stateless/02151_lc_prefetch.sql
+++ b/tests/queries/0_stateless/02151_lc_prefetch.sql
@@ -3,5 +3,6 @@ drop table if exists tab_lc;
 CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into tab_lc select number, toString(number % 10) from numbers(20000000);
 optimize table tab_lc;
+SET max_rows_to_read = '21M';
 select count() from tab_lc where y == '0' settings local_filesystem_read_prefetch=1;
 drop table if exists tab_lc;
diff --git a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql
index e9a790bea5d..902e1da543c 100644
--- a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql
+++ b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql
@@ -1,4 +1,5 @@
 -- Tags: no-parallel, long, no-debug, no-tsan, no-msan, no-asan
+SET max_rows_to_read = 0;
 
 create table data_02344 (key Int) engine=Null;
 -- 3e9 rows is enough to fill the socket buffer and cause INSERT hung.
diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
index 105fb500461..82eb4c93e3d 100644
--- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
+++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
@@ -1,5 +1,7 @@
 -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage
 
+SET max_rows_to_read = '51M';
+
 DROP TABLE IF EXISTS t_2354_dist_with_external_aggr;
 
 create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh
index 55e6ac2f758..3e28e76d6da 100755
--- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh
+++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh
@@ -11,7 +11,7 @@ echo "Parquet"
 DATA_FILE=$CUR_DIR/data_parquet/list_monotonically_increasing_offsets.parquet
 ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load"
 ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json Nullable(String)) ENGINE = Memory"
-cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet"
+cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO parquet_load FORMAT Parquet"
 ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum
 ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load"
 ${CLICKHOUSE_CLIENT} --query="drop table parquet_load"
diff --git a/tests/queries/0_stateless/02497_remote_disk_fat_column.sql b/tests/queries/0_stateless/02497_remote_disk_fat_column.sql
index d97109b66f3..65519296602 100644
--- a/tests/queries/0_stateless/02497_remote_disk_fat_column.sql
+++ b/tests/queries/0_stateless/02497_remote_disk_fat_column.sql
@@ -2,7 +2,7 @@
 set allow_suspicious_fixed_string_types=1;
 create table fat_granularity (x UInt32, fat FixedString(160000)) engine = MergeTree order by x settings storage_policy = 's3_cache';
 
-insert into fat_granularity select number, toString(number) || '_' from numbers(100000) settings max_block_size = 8192, max_insert_threads=8;
+insert into fat_granularity select number, toString(number) || '_' from numbers(100000) settings max_block_size = 3000, max_insert_threads = 8, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0;
 
 -- Too large sizes of FixedString to deserialize
 select x from fat_granularity prewhere fat like '256\_%' settings max_threads=2;
diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql
index 3e131cad0f0..ecaad62b35a 100644
--- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql
+++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql
@@ -1,6 +1,6 @@
 -- Tags: no-fasttest
 
-SET max_rows_to_read = 0;
+SET max_rows_to_read = 0, max_execution_time = 0, max_estimated_execution_time = 0;
 
 -- Query stops after timeout without an error
 SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null;
diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql
index a635fd2e86a..18d98dbdfe4 100644
--- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql
+++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql
@@ -35,6 +35,7 @@ AS
         3 AS key
     FROM numbers(0, 4000000);
 
+SET max_rows_to_read = 0;
 
 SELECT SUM(value), COUNT(*)
 FROM skewed_probe

From 9c7078bcf7edfc79dbea2cf6e065aa594810ccaf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 26 Jul 2024 02:54:11 +0200
Subject: [PATCH 059/363] Update tests

---
 tests/queries/0_stateless/00974_query_profiler.sql   |  1 +
 ...0_distributed_group_by_no_merge_order_by_long.sql |  3 ++-
 .../0_stateless/02122_join_group_by_timeout.sh       |  8 ++++----
 ...ibuted_with_external_aggregation_memory_usage.sql |  2 +-
 ..._parquet_list_monotonically_increasing_offsets.sh |  2 +-
 .../02884_parallel_window_functions.reference        | 12 ++++++------
 6 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql
index 24e4241b813..71ea14c3d64 100644
--- a/tests/queries/0_stateless/00974_query_profiler.sql
+++ b/tests/queries/0_stateless/00974_query_profiler.sql
@@ -15,6 +15,7 @@ SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FRO
 SET query_profiler_real_time_period_ns = 0;
 SET query_profiler_cpu_time_period_ns = 1000000;
 SET log_queries = 1;
+SET max_rows_to_read = 0;
 SELECT count(), ignore('test cpu time query profiler') FROM numbers_mt(10000000000);
 SET log_queries = 0;
 SYSTEM FLUSH LOGS;
diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
index 6172afbc699..e980f367de7 100644
--- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
@@ -12,7 +12,8 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n
 -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block,
 -- so the initiator will first receive all blocks from remotes and only after start merging,
 -- and will hit the memory limit.
-select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296, max_rows_to_read=0; -- { serverError MEMORY_LIMIT_EXCEEDED }
+SET max_rows_to_read = 0;
+select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED }
 
 -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently,
 -- since they don't need to wait until the aggregation will be finished,
diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh
index 59719f75d7c..79c4f01c98a 100755
--- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh
+++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh
@@ -14,7 +14,7 @@ MAX_PROCESS_WAIT=5
 
 # TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor
 ### Should be cancelled after 1 second and return a 159 exception (timeout)
-timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \
+timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 --max_execution_time 1 -q \
     "SELECT * FROM
     (
         SELECT a.name as n
@@ -31,7 +31,7 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \
     FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq
 
 ### Should stop pulling data and return what has been generated already (return code 0)
-timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \
+timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 -q \
     "SELECT a.name as n
      FROM
      (
@@ -48,7 +48,7 @@ echo $?
 
 # HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor
 ### Should be cancelled after 1 second and return a 159 exception (timeout)
-${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \
+${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0&max_execution_time=1" -d \
     "SELECT * FROM
     (
         SELECT a.name as n
@@ -66,7 +66,7 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_exec
 
 
 ### Should stop pulling data and return what has been generated already (return code 0)
-${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \
+${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0" -d \
     "SELECT a.name as n
           FROM
           (
diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
index 82eb4c93e3d..5eea6f149b5 100644
--- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
+++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
@@ -1,6 +1,6 @@
 -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage
 
-SET max_rows_to_read = '51M';
+SET max_rows_to_read = '101M';
 
 DROP TABLE IF EXISTS t_2354_dist_with_external_aggr;
 
diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh
index 3e28e76d6da..2f512697868 100755
--- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh
+++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh
@@ -12,6 +12,6 @@ DATA_FILE=$CUR_DIR/data_parquet/list_monotonically_increasing_offsets.parquet
 ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load"
 ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json Nullable(String)) ENGINE = Memory"
 cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO parquet_load FORMAT Parquet"
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum
+${CLICKHOUSE_CLIENT} --max_result_rows 0 --max_result_bytes 0 --query="SELECT * FROM parquet_load" | md5sum
 ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load"
 ${CLICKHOUSE_CLIENT} --query="drop table parquet_load"
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference
index bac15838dc2..a2cc96dda74 100644
--- a/tests/queries/0_stateless/02884_parallel_window_functions.reference
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference
@@ -12,7 +12,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     GROUP BY ac, nw
 )
 GROUP BY nw
@@ -32,7 +32,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     GROUP BY ac, nw
 )
 GROUP BY nw
@@ -53,7 +53,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     WHERE (ac % 4) = 0
     GROUP BY
         ac,
@@ -64,7 +64,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     WHERE (ac % 4) = 1
     GROUP BY
         ac,
@@ -75,7 +75,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     WHERE (ac % 4) = 2
     GROUP BY
         ac,
@@ -86,7 +86,7 @@ FROM
         AVG(wg) AS WR,
         ac,
         nw
-    FROM window_funtion_threading
+    FROM window_function_threading
     WHERE (ac % 4) = 3
     GROUP BY
         ac,

From a03f12fe76969bc2002bc76ce04f7097cb02295b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 26 Jul 2024 14:17:17 +0200
Subject: [PATCH 060/363] Fix some tests

---
 ...2354_distributed_with_external_aggregation_memory_usage.sql | 2 +-
 .../0_stateless/02884_parallel_window_functions.reference      | 1 +
 tests/queries/0_stateless/replication.lib                      | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
index 5eea6f149b5..f9da5b3a73c 100644
--- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
+++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql
@@ -25,6 +25,6 @@ select a, b, c, sum(a) as s
 from remote('127.0.0.{2,3}', currentDatabase(), t_2354_dist_with_external_aggr)
 group by a, b, c
 format Null
-settings max_memory_usage = '5Gi';
+settings max_memory_usage = '5Gi', max_result_rows = 0, max_result_bytes = 0;
 
 DROP TABLE t_2354_dist_with_external_aggr;
diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference
index a2cc96dda74..1f5346a1484 100644
--- a/tests/queries/0_stateless/02884_parallel_window_functions.reference
+++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference
@@ -42,6 +42,7 @@ SETTINGS max_threads = 1;
 0	2	0
 1	2	0
 2	2	0
+SET max_rows_to_read = 40000000;
 SELECT
     nw,
     sum(WR) AS R,
diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib
index 05651531fba..dcac721859e 100755
--- a/tests/queries/0_stateless/replication.lib
+++ b/tests/queries/0_stateless/replication.lib
@@ -114,7 +114,8 @@ function check_replication_consistency()
 
     # it's important to disable prefer warmed unmerged parts because
     # otherwise it can read non-syncrhonized state of replicas
-    res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q \
+    # also, disable the limit that is set for tests globally
+    res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 --max_rows_to_read=0 -q \
     "SELECT
         if((countDistinct(data) as c) == 0, 1, c)
     FROM

From 7f4eb59c42aa4f432c99fa7e4f8240056fa26b91 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 26 Jul 2024 14:33:40 +0200
Subject: [PATCH 061/363] Fix some tests

---
 .../01730_distributed_group_by_no_merge_order_by_long.sql       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
index e980f367de7..805e0b4fedb 100644
--- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
@@ -12,7 +12,7 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n
 -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block,
 -- so the initiator will first receive all blocks from remotes and only after start merging,
 -- and will hit the memory limit.
-SET max_rows_to_read = 0;
+SET max_rows_to_read = 0, max_result_rows = 0;
 select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED }
 
 -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently,

From 80b925ec75983ea558be536d657bfd2d277f1fbc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 26 Jul 2024 14:47:58 +0200
Subject: [PATCH 062/363] Update test

---
 ...arallel_replicas_join_algo_and_analyzer.sh | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
index 8cefa873940..2c5b5a2a07b 100755
--- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
+++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh
@@ -86,11 +86,11 @@ SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1"
 
-$CLICKHOUSE_CLIENT -q "
+$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q "
 select * from (select key, value from num_1) l
 inner join (select key, value from num_2) r on l.key = r.key
 order by l.key limit 10 offset 700000
-SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
+SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -123,12 +123,12 @@ SETTINGS allow_experimental_analyzer=1,
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1"
 
-$CLICKHOUSE_CLIENT -q "
+$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q "
 select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
+SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -149,12 +149,12 @@ SETTINGS allow_experimental_analyzer=1,
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0"
 
-$CLICKHOUSE_CLIENT -q "
+$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q "
 select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace',
+SETTINGS allow_experimental_analyzer=1, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -174,12 +174,12 @@ SETTINGS allow_experimental_analyzer=1,
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0"
 
-$CLICKHOUSE_CLIENT -q "
+$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q "
 select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace',
+SETTINGS allow_experimental_analyzer=1, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -200,12 +200,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0,
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'"
 
-$CLICKHOUSE_CLIENT -q "
+$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q "
 select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -225,12 +225,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0,
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'"
 
-$CLICKHOUSE_CLIENT -q "
+$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q "
 select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |
@@ -250,12 +250,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0,
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'"
 
-$CLICKHOUSE_CLIENT -q "
+$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q "
 select * from (select key, value from num_1) l
 inner join (select key, value from num_2 inner join
   (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r
 on l.key = r.key order by l.key limit 10 offset 10000
-SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
+SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace',
 allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1,
 cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 |
 grep "executeQuery\|<Debug>.*Coordinator: Coordination done" |

From 342bbb53c65d32698d55649252e6bc29f2a557b8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 26 Jul 2024 14:49:36 +0200
Subject: [PATCH 063/363] It was an optimization, not a limit

---
 tests/config/users.d/limits.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml
index 23aaccf9298..63c4e884a9d 100644
--- a/tests/config/users.d/limits.yaml
+++ b/tests/config/users.d/limits.yaml
@@ -34,7 +34,6 @@ profiles:
     max_bytes_in_set: 10G
     max_rows_in_join: 10G
     max_bytes_in_join: 10G
-    max_rows_in_set_to_optimize_join: 1G
     max_rows_to_transfer: 1G
     max_bytes_to_transfer: 1G
     max_rows_in_distinct: 10G

From fd5934d0ad8954a263554ce48402849deafa5341 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 27 Jul 2024 01:28:38 +0200
Subject: [PATCH 064/363] Update test

---
 .../01730_distributed_group_by_no_merge_order_by_long.sql       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
index 805e0b4fedb..6eb55839f5e 100644
--- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
@@ -1,6 +1,7 @@
 -- Tags: long, distributed, no-random-settings
 
 drop table if exists data_01730;
+SET max_rows_to_read = 0, max_result_rows = 0;
 
 -- does not use 127.1 due to prefer_localhost_replica
 
@@ -12,7 +13,6 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n
 -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block,
 -- so the initiator will first receive all blocks from remotes and only after start merging,
 -- and will hit the memory limit.
-SET max_rows_to_read = 0, max_result_rows = 0;
 select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED }
 
 -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently,

From 16e84d1e3678b56d0b32dff1377b6daadf870dd2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 27 Jul 2024 02:37:54 +0200
Subject: [PATCH 065/363] Update test

---
 tests/queries/0_stateless/01651_lc_insert_tiny_log.sql | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
index d11c9120c61..bc5553ad227 100644
--- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
+++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
@@ -1,7 +1,7 @@
-set allow_suspicious_low_cardinality_types=1;
+set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '21M';
 drop table if exists perf_lc_num;
 
-CREATE TABLE perf_lc_num(　        num UInt8,　        arr Array(LowCardinality(Int64)) default [num]　        ) ENGINE = TinyLog;
+CREATE TABLE perf_lc_num(        num UInt8,        arr Array(LowCardinality(Int64)) default [num]        ) ENGINE = TinyLog;
 
 INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
 
@@ -16,7 +16,7 @@ select sum(length(arr)), sum(num) from perf_lc_num;
 drop table if exists perf_lc_num;
 
 
-CREATE TABLE perf_lc_num(　        num UInt8,　        arr Array(LowCardinality(Int64)) default [num]　        ) ENGINE = Log;
+CREATE TABLE perf_lc_num(        num UInt8,        arr Array(LowCardinality(Int64)) default [num]        ) ENGINE = Log;
 
 INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
 
@@ -31,7 +31,7 @@ select sum(length(arr)), sum(num) from perf_lc_num;
 drop table if exists perf_lc_num;
 
 
-CREATE TABLE perf_lc_num(　        num UInt8,　        arr Array(LowCardinality(Int64)) default [num]　        ) ENGINE = StripeLog;
+CREATE TABLE perf_lc_num(        num UInt8,        arr Array(LowCardinality(Int64)) default [num]        ) ENGINE = StripeLog;
 
 INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000);
 
@@ -44,5 +44,3 @@ select sum(length(arr)) from perf_lc_num;
 select sum(length(arr)), sum(num) from perf_lc_num;
 
 drop table if exists perf_lc_num;
-
-

From 59ce7ec1c6f478b3ac700a612fea46e7fc12756b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 27 Jul 2024 04:26:46 +0200
Subject: [PATCH 066/363] Update 01651_lc_insert_tiny_log.sql

---
 tests/queries/0_stateless/01651_lc_insert_tiny_log.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
index bc5553ad227..b1d6a39d5c9 100644
--- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
+++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql
@@ -1,4 +1,4 @@
-set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '21M';
+set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '31M';
 drop table if exists perf_lc_num;
 
 CREATE TABLE perf_lc_num(        num UInt8,        arr Array(LowCardinality(Int64)) default [num]        ) ENGINE = TinyLog;

From 9a6de84559cdf927e8747e4cd536676fd3b1c513 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 27 Jul 2024 10:39:55 +0200
Subject: [PATCH 067/363] Update tests

---
 .../00086_concat_nary_const_with_nonconst_segfault.sql          | 2 +-
 .../01730_distributed_group_by_no_merge_order_by_long.sql       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql b/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql
index 2f0ef648983..4b87b2af28d 100644
--- a/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql
+++ b/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql
@@ -1 +1 @@
-SELECT extract(toString(number), '10000000') FROM system.numbers_mt WHERE concat(materialize('1'), '...', toString(number)) LIKE '%10000000%' LIMIT 1
+SELECT extract(toString(number), '10000000') FROM system.numbers_mt WHERE concat(materialize('1'), '...', toString(number)) LIKE '%10000000%' LIMIT 1 SETTINGS max_rows_to_read = 0;
diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
index 6eb55839f5e..83a26c83005 100644
--- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
+++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql
@@ -1,7 +1,7 @@
 -- Tags: long, distributed, no-random-settings
 
 drop table if exists data_01730;
-SET max_rows_to_read = 0, max_result_rows = 0;
+SET max_rows_to_read = 0, max_result_rows = 0, max_bytes_before_external_group_by = 0;
 
 -- does not use 127.1 due to prefer_localhost_replica
 

From 3db505a1327fc5bf96c93f2a510436402be13f3b Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:53:38 +0200
Subject: [PATCH 068/363] Update SettingsChangesHistory.cpp

---
 src/Core/SettingsChangesHistory.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 1dda9e72084..dc3bf984cc6 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -67,6 +67,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
               {"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."},
               {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
               {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
+              {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
               {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
               {"collect_hash_table_stats_during_joins", false, true, "New setting."},
               {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},

From 0dfcab25ed2f98c8a6133dad4061e37e4541c16e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 27 Jul 2024 16:10:15 +0200
Subject: [PATCH 069/363] Better limits

---
 tests/config/users.d/limits.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml
index 63c4e884a9d..46cff73142c 100644
--- a/tests/config/users.d/limits.yaml
+++ b/tests/config/users.d/limits.yaml
@@ -38,7 +38,7 @@ profiles:
     max_bytes_to_transfer: 1G
     max_rows_in_distinct: 10G
     max_bytes_in_distinct: 10G
-    max_memory_usage_for_user: 10G
+    max_memory_usage_for_user: 32G
     max_network_bandwidth: 100G
     max_network_bytes: 1T
     max_network_bandwidth_for_user: 100G

From b1eaec0d49b326df09c46ea3107e98fa083c220d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 27 Jul 2024 23:29:52 +0200
Subject: [PATCH 070/363] Update 01293_show_settings.reference

---
 tests/queries/0_stateless/01293_show_settings.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference
index 8b383813c9f..c4c3473ee18 100644
--- a/tests/queries/0_stateless/01293_show_settings.reference
+++ b/tests/queries/0_stateless/01293_show_settings.reference
@@ -6,6 +6,6 @@ external_storage_connect_timeout_sec	UInt64	10
 s3_connect_timeout_ms	UInt64	1000
 filesystem_prefetch_max_memory_usage	UInt64	1073741824
 max_memory_usage	UInt64	5000000000
-max_memory_usage_for_user	UInt64	10000000000
+max_memory_usage_for_user	UInt64	32000000000
 max_untracked_memory	UInt64	1048576
 memory_profiler_step	UInt64	1048576

From cd1350c0f380d2c2754231c12aefb891c299b3fb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 28 Jul 2024 14:07:36 +0200
Subject: [PATCH 071/363] Mark some tests as long

---
 .../0_stateless/00111_shard_external_sort_distributed.sql       | 2 +-
 .../00377_shard_group_uniq_array_of_string_array.sql            | 2 +-
 tests/queries/0_stateless/00906_low_cardinality_cache.sql       | 2 ++
 tests/queries/0_stateless/01603_read_with_backoff_bug.sql       | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
index ef9c0f9f9d0..93efc317bfa 100644
--- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
+++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
@@ -1,4 +1,4 @@
--- Tags: distributed
+-- Tags: distributed, long
 
 SET max_memory_usage = 150000000;
 SET max_bytes_before_external_sort = 10000000;
diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
index 180a6a04861..1ec91ac2396 100644
--- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
+++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql
@@ -1,4 +1,4 @@
--- Tags: shard
+-- Tags: shard, long
 SET max_rows_to_read = '55M';
 
 DROP TABLE IF EXISTS group_uniq_arr_str;
diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql
index 9c1abe1b6df..337fba865fd 100644
--- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql
+++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql
@@ -1,3 +1,5 @@
+-- Tags: long
+
 SET max_rows_to_read = '100M';
 drop table if exists lc_00906;
 create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
index b68d15a2200..85be5082d92 100644
--- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
+++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
@@ -1,4 +1,4 @@
--- Tags: no-tsan
+-- Tags: no-tsan, long
 -- Tag no-tsan: Too long for TSan
 
 set enable_filesystem_cache=0;

From 1042fc68c2969ee4963268a97daf522d0e163ac5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 28 Jul 2024 16:41:43 +0200
Subject: [PATCH 072/363] Update test

---
 .../queries/0_stateless/00632_get_sample_block_cache.sql  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql
index ae9b6bb7b2c..6a226c4912a 100644
--- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql
+++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql
@@ -2,6 +2,9 @@
 
 SET joined_subquery_requires_alias = 0;
 
+-- We are no longer interested in the old analyzer.
+SET allow_experimental_analyzer = 1;
+
 -- This test (SELECT) without cache can take tens minutes
 DROP TABLE IF EXISTS dict_string;
 DROP TABLE IF EXISTS dict_ui64;
@@ -41,8 +44,6 @@ SETTINGS index_granularity = 8192;
 CREATE TABLE dict_string (entityIri String) ENGINE = Memory;
 CREATE TABLE dict_ui64 (learnerId UInt64) ENGINE = Memory;
 
---SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM `CloM8CwMR2`) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN `CloM8CwMR2` WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`,  `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) FORMAT JSON;
-
 SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`,
  `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM dict_string) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN dict_string WHERE `views`.`repeatingView` = 1 AND `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`,
   `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`);
@@ -55,7 +56,7 @@ DROP TABLE video_views;
 
 
 
--- Test for tsan: Ensure cache used from one thread
+-- Test for tsan: Ensure cache is used from one thread
 SET max_threads = 32;
 
 DROP TABLE IF EXISTS sample_00632;
@@ -173,7 +174,6 @@ FROM
     UNION ALL SELECT * FROM ( SELECT * FROM sample_00632 WHERE x > 0 )
     )
     GROUP BY x
-    --HAVING c = 1
     ORDER BY x ASC
 );
 DROP TABLE sample_00632;

From 81714ce561c99710c3350a40d662966d5bb1a86a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 28 Jul 2024 16:44:53 +0200
Subject: [PATCH 073/363] Make test simpler

---
 tests/queries/0_stateless/02585_query_status_deadlock.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh
index 6321ac0064a..932cf593393 100755
--- a/tests/queries/0_stateless/02585_query_status_deadlock.sh
+++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1"
 
 $CLICKHOUSE_CLIENT --query_id="$QUERY_ID" --max_rows_to_read 0 -n -q "
-create temporary table tmp as select * from numbers(500000000);
+create temporary table tmp as select * from numbers(100000000);
 select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null &
 
 $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
@@ -19,7 +19,7 @@ do
     if [ -n "$res" ]; then
         break
     fi
-    sleep  1
+    sleep 1
 done
 
 $CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null

From aec346676127abba85886a828e663ebf05cfa81e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 28 Jul 2024 23:07:28 +0200
Subject: [PATCH 074/363] Update test

---
 tests/queries/0_stateless/01161_all_system_tables.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01161_all_system_tables.sh b/tests/queries/0_stateless/01161_all_system_tables.sh
index 739df782a39..d4a80d074dc 100755
--- a/tests/queries/0_stateless/01161_all_system_tables.sh
+++ b/tests/queries/0_stateless/01161_all_system_tables.sh
@@ -19,7 +19,7 @@ function run_selects()
     thread_num=$1
     readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT database || '.' || name FROM system.tables
     WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name != 'zookeeper' and name != 'models'
-    AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher'")
+    AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher' AND name != 'coverage_log'")
 
     for t in "${tables_arr[@]}"
     do

From e7cd07510aee8769cd31c4cbfa6a86d6198d37f5 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 29 Jul 2024 13:03:21 +0200
Subject: [PATCH 075/363] Move setting to 24.8 version

---
 src/Core/SettingsChangesHistory.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index dc3bf984cc6..41319ac7645 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -57,6 +57,8 @@ String ClickHouseVersion::toString() const
 /// Note: please check if the key already exists to prevent duplicate entries.
 static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory::SettingsChanges>> settings_changes_history_initializer =
 {
+    {"24.8", {{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
+              }},
     {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
               {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"},
               {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"},
@@ -66,7 +68,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
               {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."},
               {"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."},
               {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
-              {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
               {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
               {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
               {"collect_hash_table_stats_during_joins", false, true, "New setting."},

From e517338182b79aaa70c6b3e2d15c499acccc4d88 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 29 Jul 2024 21:16:19 +0200
Subject: [PATCH 076/363] Update tests

---
 tests/queries/0_stateless/replication.lib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib
index dcac721859e..36309cf0331 100755
--- a/tests/queries/0_stateless/replication.lib
+++ b/tests/queries/0_stateless/replication.lib
@@ -89,7 +89,7 @@ function check_replication_consistency()
     # Touch all data to check that it's readable (and trigger PartCheckThread if needed)
     # it's important to disable prefer warmed unmerged parts because
     # otherwise it can read non-syncrhonized state of replicas
-    while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do
+    while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 --max_result_rows 0 --max_result_bytes 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do
         sleep 1;
         num_tries=$((num_tries+1))
         if [ $num_tries -eq 250 ]; then

From d89019293e955452ede3e0abbe4b11ab2a3471bb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 30 Jul 2024 06:15:13 +0200
Subject: [PATCH 077/363] Update tests

---
 tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 4 ++--
 tests/queries/0_stateless/02700_s3_part_INT_MAX.sh        | 2 +-
 tests/queries/1_stateful/00157_cache_dictionary.sql       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
index 85be5082d92..f41b336be46 100644
--- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
+++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
@@ -1,5 +1,5 @@
--- Tags: no-tsan, long
--- Tag no-tsan: Too long for TSan
+-- Tags: no-tsan, no-msan, long
+-- too long.
 
 set enable_filesystem_cache=0;
 set enable_filesystem_cache_on_write_operations=0;
diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
index c431686b594..cfb38c60615 100755
--- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
+++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
@@ -12,7 +12,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # too slow with this.
 #
 # Unfortunately, the test has to buffer it in memory.
-$CLICKHOUSE_CLIENT --max_memory_usage 10G -nm -q "
+$CLICKHOUSE_CLIENT --max_memory_usage 16G -nm -q "
     INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV')
     SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024)
     SETTINGS s3_max_single_part_upload_size = '5Gi';
diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql
index 3621ff82126..a7c6c099de6 100644
--- a/tests/queries/1_stateful/00157_cache_dictionary.sql
+++ b/tests/queries/1_stateful/00157_cache_dictionary.sql
@@ -9,7 +9,7 @@ ORDER BY (CounterID, EventDate, intHash32(UserID))
 SAMPLE BY intHash32(UserID)
 SETTINGS storage_policy = 'default';
 
-INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000;
+INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0;
 
 CREATE DATABASE IF NOT EXISTS db_dict;
 DROP DICTIONARY IF EXISTS db_dict.cache_hits;

From 56ba7c5d48cfa648f1d496d55cdfd50450da0299 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 30 Jul 2024 09:33:17 +0200
Subject: [PATCH 078/363] Update a test

---
 tests/queries/0_stateless/00632_get_sample_block_cache.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql
index 6a226c4912a..a631cbb8b86 100644
--- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql
+++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql
@@ -57,7 +57,7 @@ DROP TABLE video_views;
 
 
 -- Test for tsan: Ensure cache is used from one thread
-SET max_threads = 32;
+SET max_threads = 32, max_memory_usage = '10G';
 
 DROP TABLE IF EXISTS sample_00632;
 

From 458509909da0e6702dbb6023775112e4fa1c9a83 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 30 Jul 2024 23:21:14 +0200
Subject: [PATCH 079/363] Update test

---
 tests/queries/1_stateful/00157_cache_dictionary.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql
index a7c6c099de6..bb5a21d0779 100644
--- a/tests/queries/1_stateful/00157_cache_dictionary.sql
+++ b/tests/queries/1_stateful/00157_cache_dictionary.sql
@@ -9,7 +9,7 @@ ORDER BY (CounterID, EventDate, intHash32(UserID))
 SAMPLE BY intHash32(UserID)
 SETTINGS storage_policy = 'default';
 
-INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0;
+INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192;
 
 CREATE DATABASE IF NOT EXISTS db_dict;
 DROP DICTIONARY IF EXISTS db_dict.cache_hits;

From 33ed33d2af0821785165f0ffe80e3c8086c637e2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 31 Jul 2024 17:38:20 +0200
Subject: [PATCH 080/363] Minor change

---
 src/Databases/DatabaseOnDisk.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index f419f5811a1..734f354d9a5 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri
         std::lock_guard lock(mutex);
         if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end())
         {
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name);
         }
         else
         {

From af324f69e955310f54e451d1120e8526ac3150fb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 31 Jul 2024 17:56:20 +0200
Subject: [PATCH 081/363] Update test

---
 tests/queries/1_stateful/00158_cache_dictionary_has.sql | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql
index 32c109417de..631a7751550 100644
--- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql
+++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql
@@ -10,6 +10,8 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PA
 LIFETIME(MIN 300 MAX 600)
 LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000));
 
+SET timeout_before_checking_execution_speed = 300;
+
 SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100);
 SELECT count() from test.hits PREWHERE WatchID % 1400 == 0;
 
@@ -20,4 +22,4 @@ SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) a
 SELECT count() from test.hits PREWHERE WatchID % 5 == 0;
 
 DROP DICTIONARY IF EXISTS db_dict.cache_hits;
-DROP DATABASE IF  EXISTS db_dict;
+DROP DATABASE IF EXISTS db_dict;

From 461251b519120dd6b0cb64471bfba70160137e50 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 31 Jul 2024 19:21:47 +0200
Subject: [PATCH 082/363] Update a test

---
 .../02450_kill_distributed_query_deadlock.sh   | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
index 445f907bcc5..96692ba325a 100755
--- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
+++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh
@@ -5,20 +5,24 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-# Test that running distributed query and cancel it ASAP,
-# this can trigger a hung/deadlock in ProcessorList.
-for i in {1..50}; do
+# Test that runs a distributed query and cancels it ASAP,
+# this has a chance to trigger a hung/deadlock in ProcessorList.
+for i in {1..50}
+do
     query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i"
-    $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null &
-    while :; do
+    $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 --max_bytes_to_read 0 --max_result_rows 0 --max_result_bytes 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null &
+    while true
+    do
         killed_queries="$($CLICKHOUSE_CLIENT -q "kill query where query_id = '$query_id' sync" | wc -l)"
-        if [[ "$killed_queries" -ge 1 ]]; then
+        if [[ "$killed_queries" -ge 1 ]]
+        then
             break
         fi
     done
     wait -n
     query_return_status=$?
-    if [[ $query_return_status -eq 0 ]]; then
+    if [[ $query_return_status -eq 0 ]]
+    then
         echo "Query $query_id should be cancelled, however it returns successfully"
     fi
 done

From af30e72e187713f4e435ff4f1f2382ea6671ba21 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 1 Aug 2024 23:58:57 +0200
Subject: [PATCH 083/363] Better limits

---
 tests/config/users.d/limits.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml
index 46cff73142c..53cbbfa744a 100644
--- a/tests/config/users.d/limits.yaml
+++ b/tests/config/users.d/limits.yaml
@@ -26,6 +26,7 @@ profiles:
     max_execution_time_leaf: 600
     max_execution_speed: 100G
     max_execution_speed_bytes: 10T
+    timeout_before_checking_execution_speed: 300
     max_estimated_execution_time: 600
     max_columns_to_read: 20K
     max_temporary_columns: 20K

From fb23cbdef6b38d1ba90aa1bb304487a3bb947e5c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 31 Jul 2024 17:38:20 +0200
Subject: [PATCH 084/363] Minor change

---
 src/Databases/DatabaseOnDisk.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index f419f5811a1..734f354d9a5 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri
         std::lock_guard lock(mutex);
         if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end())
         {
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name);
         }
         else
         {

From 15e9f8d9cbe4c2f0a1d9973650d93fa195a56276 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 2 Aug 2024 22:52:01 +0200
Subject: [PATCH 085/363] Fix `02481_async_insert_race_long`

---
 tests/queries/0_stateless/02481_async_insert_race_long.sh | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh
index b0088017d32..def97409bc4 100755
--- a/tests/queries/0_stateless/02481_async_insert_race_long.sh
+++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh
@@ -29,11 +29,8 @@ function insert3()
 {
     local TIMELIMIT=$((SECONDS+$1))
     while [ $SECONDS -lt "$TIMELIMIT" ]; do
-        ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" &
-        sleep 0.05
+        ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')"
     done
-
-    wait
 }
 
 function select1()

From 13b435d281ce39a4f8eee889da482dbf272dd1cf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 31 Jul 2024 17:38:20 +0200
Subject: [PATCH 086/363] Minor change

---
 src/Databases/DatabaseOnDisk.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index f419f5811a1..734f354d9a5 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri
         std::lock_guard lock(mutex);
         if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end())
         {
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name);
         }
         else
         {

From 113f7e0c8c9cc61ab72f5ccd8bb6e5fac58cbaea Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 3 Aug 2024 19:37:38 +0200
Subject: [PATCH 087/363] Maybe better

---
 tests/queries/1_stateful/00157_cache_dictionary.sql | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql
index bb5a21d0779..f1bee538828 100644
--- a/tests/queries/1_stateful/00157_cache_dictionary.sql
+++ b/tests/queries/1_stateful/00157_cache_dictionary.sql
@@ -9,7 +9,8 @@ ORDER BY (CounterID, EventDate, intHash32(UserID))
 SAMPLE BY intHash32(UserID)
 SETTINGS storage_policy = 'default';
 
-INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192;
+INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000
+    SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192, max_insert_threads = 1, max_threads = 1;
 
 CREATE DATABASE IF NOT EXISTS db_dict;
 DROP DICTIONARY IF EXISTS db_dict.cache_hits;

From 1b251fe08837aee56fe541bf3ef54ff647650869 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 5 Aug 2024 21:10:31 +0000
Subject: [PATCH 088/363] Allow to specify min and max for random settings in
 the test

---
 docs/en/development/tests.md                  | 22 +++++
 tests/clickhouse-test                         | 92 ++++++++++++++++---
 ...mic_read_subcolumns_compact_merge_tree.sql |  1 +
 ...ynamic_read_subcolumns_wide_merge_tree.sql |  1 +
 ...merges_1_horizontal_compact_merge_tree.sql |  4 +
 ..._merges_1_horizontal_compact_wide_tree.sql |  2 +
 ...c_merges_1_vertical_compact_merge_tree.sql |  2 +
 ...amic_merges_1_vertical_wide_merge_tree.sql |  2 +
 ...merges_2_horizontal_compact_merge_tree.sql |  1 +
 ...ic_merges_2_horizontal_wide_merge_tree.sql |  1 +
 ...c_merges_2_vertical_compact_merge_tree.sql |  1 +
 ...amic_merges_2_vertical_wide_merge_tree.sql |  1 +
 ...sted_dynamic_merges_compact_horizontal.sql |  1 +
 ...nested_dynamic_merges_compact_vertical.sql |  1 +
 ..._nested_dynamic_merges_wide_horizontal.sql |  1 +
 ...38_nested_dynamic_merges_wide_vertical.sql |  1 +
 16 files changed, 122 insertions(+), 12 deletions(-)

diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md
index 269995a1a96..f0afa983fec 100644
--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@@ -91,6 +91,28 @@ SELECT 1
 In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features.
 For example, if your test uses a MySQL table, you should add a tag `use-mysql`.
 
+### Specifying limits for random settings
+
+A test can specify minimum and maximum allowed values for settings that can be randomized during test run.
+
+For `.sh` tests limits are written as a comment on the line next to tags or on the second line if no tags are specified:
+
+```bash
+#!/usr/bin/env bash
+# Tags: no-fasttest
+# Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None)
+```
+
+For `.sql` tests tags are placed as a SQL comment in the line next to tags or in the first line:
+
+```sql
+-- Tags: no-fasttest
+-- Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None)
+SELECT 1
+```
+
+If you need to specify only one limit, you can use `None` for another one.
+
 ### Choosing the Test Name
 
 The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later.
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index a29c786e998..ea488e7c3dd 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -39,6 +39,7 @@ from errno import ESRCH
 from subprocess import PIPE, Popen
 from time import sleep, time
 from typing import Dict, List, Optional, Set, Tuple, Union
+from ast import literal_eval as make_tuple
 
 try:
     import termcolor  # type: ignore
@@ -1068,9 +1069,25 @@ class TestCase:
 
         return description + "\n"
 
+    def apply_random_settings_limits(self, random_settings):
+        print("Random settings limits:", self.random_settings_limits)
+        for setting in random_settings:
+            if setting in self.random_settings_limits:
+                min = self.random_settings_limits[setting][0]
+                if min and random_settings[setting] < min:
+                    random_settings[setting] = min
+                max = self.random_settings_limits[setting][1]
+                if max and random_settings[setting] > max:
+                    random_settings[setting] = max
+
     def __init__(self, suite, case: str, args, is_concurrent: bool):
         self.case: str = case  # case file name
         self.tags: Set[str] = suite.all_tags[case] if case in suite.all_tags else set()
+        self.random_settings_limits = (
+            suite.all_random_settings_limits[case]
+            if case in suite.all_random_settings_limits
+            else dict()
+        )
 
         for tag in os.getenv("GLOBAL_TAGS", "").split(","):
             self.tags.add(tag.strip())
@@ -1112,11 +1129,13 @@ class TestCase:
 
         if self.randomize_settings:
             self.random_settings = SettingsRandomizer.get_random_settings(args)
+            self.apply_random_settings_limits(self.random_settings)
 
         if self.randomize_merge_tree_settings:
             self.merge_tree_random_settings = (
                 MergeTreeSettingsRandomizer.get_random_settings(args)
             )
+            self.apply_random_settings_limits(self.merge_tree_random_settings)
 
         self.base_url_params = (
             os.environ["CLICKHOUSE_URL_PARAMS"]
@@ -1900,7 +1919,9 @@ class TestSuite:
         return test_name
 
     @staticmethod
-    def read_test_tags(suite_dir: str, all_tests: List[str]) -> Dict[str, Set[str]]:
+    def read_test_tags_and_random_settings_limits(
+        suite_dir: str, all_tests: List[str]
+    ) -> (Dict[str, Set[str]], Dict[str, Dict[str, Tuple[int, int]]]):
         def get_comment_sign(filename):
             if filename.endswith(".sql") or filename.endswith(".sql.j2"):
                 return "--"
@@ -1925,22 +1946,48 @@ class TestSuite:
             tags = {tag.strip() for tag in tags}
             return tags
 
+        def parse_random_settings_limits_from_line(
+            line, comment_sign
+        ) -> Dict[str, Tuple[int, int]]:
+            if not line.startswith(comment_sign):
+                return {}
+            random_settings_limits_str = line[
+                len(comment_sign) :
+            ].lstrip()  # noqa: ignore E203
+            random_settings_limits_prefix = "Random settings limits:"
+            if not random_settings_limits_str.startswith(random_settings_limits_prefix):
+                return {}
+            random_settings_limits_str = random_settings_limits_str[
+                len(random_settings_limits_prefix) :
+            ]  # noqa: ignore E203
+            # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...'
+            random_settings_limits = re.findall(
+                "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str
+            )
+            random_settings_limits = {
+                pair[0]: make_tuple(pair[1]) for pair in random_settings_limits
+            }
+            return random_settings_limits
+
         def is_shebang(line: str) -> bool:
             return line.startswith("#!")
 
         def find_tag_line(file):
-            for line in file:
+            line = file.readline()
+            while line != "":
                 line = line.strip()
                 if line and not is_shebang(line):
                     return line
+                line = file.readline()
             return ""
 
-        def load_tags_from_file(filepath):
+        def load_tags_and_random_settings_limits_from_file(filepath):
             comment_sign = get_comment_sign(filepath)
             need_query_params = False
             with open(filepath, "r", encoding="utf-8") as file:
                 try:
                     tag_line = find_tag_line(file)
+                    next_line = file.readline()
                 except UnicodeDecodeError:
                     return []
                 try:
@@ -1950,21 +1997,35 @@ class TestSuite:
                                 need_query_params = True
                 except UnicodeDecodeError:
                     pass
-            parsed_tags = parse_tags_from_line(tag_line, comment_sign)
-            if need_query_params:
-                parsed_tags.add("need-query-parameters")
-            return parsed_tags
+                parsed_tags = parse_tags_from_line(tag_line, comment_sign)
+                if need_query_params:
+                    parsed_tags.add("need-query-parameters")
+                random_settings_limits_line = next_line if parsed_tags else tag_line
+                random_settings_limits = parse_random_settings_limits_from_line(
+                    random_settings_limits_line, comment_sign
+                )
+            return parsed_tags, random_settings_limits
 
         all_tags = {}
+        all_random_settings_limits = {}
         start_time = datetime.now()
         for test_name in all_tests:
-            tags = load_tags_from_file(os.path.join(suite_dir, test_name))
+            (
+                tags,
+                random_settings_limits,
+            ) = load_tags_and_random_settings_limits_from_file(
+                os.path.join(suite_dir, test_name)
+            )
             if tags:
                 all_tags[test_name] = tags
+            if random_settings_limits:
+                all_random_settings_limits[test_name] = random_settings_limits
         elapsed = (datetime.now() - start_time).total_seconds()
         if elapsed > 1:
-            print(f"Tags for suite {suite_dir} read in {elapsed:.2f} seconds")
-        return all_tags
+            print(
+                f"Tags and random settings limits for suite {suite_dir} read in {elapsed:.2f} seconds"
+            )
+        return all_tags, all_random_settings_limits
 
     def __init__(self, args, suite_path: str, suite_tmp_path: str, suite: str):
         self.args = args
@@ -1994,9 +2055,16 @@ class TestSuite:
         self.all_tests: List[str] = self.get_tests_list(
             self.tests_in_suite_key_func, filter_func
         )
-        self.all_tags: Dict[str, Set[str]] = self.read_test_tags(
-            self.suite_path, self.all_tests
+
+        all_tags_and_random_settings_limits = (
+            self.read_test_tags_and_random_settings_limits(
+                self.suite_path, self.all_tests
+            )
         )
+        self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0]
+        self.all_random_settings_limits: Dict[
+            str, Dict[str, (int, int)]
+        ] = all_tags_and_random_settings_limits[1]
 
         self.sequential_tests = []
         self.parallel_tests = []
diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql
index ddfba4418bd..822393d3c78 100644
--- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql
@@ -1,4 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql
index 5aac5f7b72f..2394893dc8b 100644
--- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql
@@ -1,4 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
index d2c787040e5..7c2e7c3d2be 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
@@ -1,4 +1,6 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+
 set allow_experimental_dynamic_type=1;
 
 drop table if exists test;
@@ -31,3 +33,5 @@ optimize table test final;
 select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
 
 drop table test;
+
+select 1;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
index f99bf771608..aa62435188a 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
@@ -1,4 +1,6 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+
 set allow_experimental_dynamic_type=1;
 
 drop table if exists test;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
index be81596d043..bfc7bb9d206 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
@@ -1,4 +1,6 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+
 set allow_experimental_dynamic_type=1;
 
 drop table if exists test;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
index f6396af42a8..233667db0a7 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
@@ -1,4 +1,6 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+
 set allow_experimental_dynamic_type=1;
 
 drop table if exists test;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql
index e133ac3001f..48a6a55378c 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql
@@ -1,4 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type = 1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql
index d527081b763..44b298b1c35 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql
@@ -1,4 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type = 1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql
index ebccfb77922..f42150720b3 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql
@@ -1,4 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type = 1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql
index 104d6018e41..ee4ff6af162 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql
@@ -1,4 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type = 1;
 
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
index 1d5c63dcdf1..e0636f053df 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
@@ -1,4 +1,5 @@
 -- Tags: long
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
index 2bffe35c577..edfad295e9a 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
@@ -1,4 +1,5 @@
 -- Tags: long
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
index fb686091ebb..79d488ec253 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
@@ -1,4 +1,5 @@
 -- Tags: long
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
index ed195452d56..e2a453b867a 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
@@ -1,4 +1,5 @@
 -- Tags: long
+-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;

From d3dc17453377368defd80cda9f4b95dda6adc9df Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 5 Aug 2024 21:15:11 +0000
Subject: [PATCH 089/363] Remove log

---
 tests/clickhouse-test | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 389193836bf..5fcb9fb80f1 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -682,7 +682,6 @@ class FailureReason(enum.Enum):
     BUILD = "not running for current build"
     NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas"
     SHARED_MERGE_TREE = "no-shared-merge-tree"
-    DISTRIBUTED_CACHE = "distributed-cache"
 
     # UNKNOWN reasons
     NO_REFERENCE = "no reference file"
@@ -1071,7 +1070,6 @@ class TestCase:
         return description + "\n"
 
     def apply_random_settings_limits(self, random_settings):
-        print("Random settings limits:", self.random_settings_limits)
         for setting in random_settings:
             if setting in self.random_settings_limits:
                 min = self.random_settings_limits[setting][0]
@@ -1211,9 +1209,6 @@ class TestCase:
         elif tags and ("no-replicated-database" in tags) and args.replicated_database:
             return FailureReason.REPLICATED_DB
 
-        elif tags and ("no-distributed-cache" in tags) and args.distributed_cache:
-            return FailureReason.DISTRIBUTED_CACHE
-
         elif (
             tags
             and ("atomic-database" in tags)
@@ -1251,11 +1246,6 @@ class TestCase:
         ):
             return FailureReason.SKIP
 
-        elif "no-flaky-check" in tags and (
-            1 == int(os.environ.get("IS_FLAKY_CHECK", 0))
-        ):
-            return FailureReason.SKIP
-
         elif tags:
             for build_flag in args.build_flags:
                 if "no-" + build_flag in tags:
@@ -2295,6 +2285,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool
                         args, test_suite, client_options, server_logs_level
                     )
                     test_result = test_case.process_result(test_result, MESSAGES)
+                    break
                 except TimeoutError:
                     break
                 finally:
@@ -3280,12 +3271,6 @@ def parse_args():
         default=False,
         help="Run tests over s3 storage",
     )
-    parser.add_argument(
-        "--distributed-cache",
-        action="store_true",
-        default=False,
-        help="Run tests with enabled distributed cache",
-    )
     parser.add_argument(
         "--azure-blob-storage",
         action="store_true",

From 18a7a82458ce7ec3f12f7b6751699f119769ed55 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 5 Aug 2024 21:16:18 +0000
Subject: [PATCH 090/363] Better formatting

---
 tests/clickhouse-test | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 5fcb9fb80f1..bcb8a12625b 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1950,15 +1950,11 @@ class TestSuite:
         ) -> Dict[str, Tuple[int, int]]:
             if not line.startswith(comment_sign):
                 return {}
-            random_settings_limits_str = line[
-                len(comment_sign) :
-            ].lstrip()  # noqa: ignore E203
+            random_settings_limits_str = line[len(comment_sign) :].lstrip()  # noqa: ignore E203
             random_settings_limits_prefix = "Random settings limits:"
             if not random_settings_limits_str.startswith(random_settings_limits_prefix):
                 return {}
-            random_settings_limits_str = random_settings_limits_str[
-                len(random_settings_limits_prefix) :
-            ]  # noqa: ignore E203
+            random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :]  # noqa: ignore E203
             # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...'
             random_settings_limits = re.findall(
                 "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str
@@ -1996,25 +1992,20 @@ class TestSuite:
                                 need_query_params = True
                 except UnicodeDecodeError:
                     pass
-                parsed_tags = parse_tags_from_line(tag_line, comment_sign)
-                if need_query_params:
-                    parsed_tags.add("need-query-parameters")
-                random_settings_limits_line = next_line if parsed_tags else tag_line
-                random_settings_limits = parse_random_settings_limits_from_line(
-                    random_settings_limits_line, comment_sign
-                )
+            parsed_tags = parse_tags_from_line(tag_line, comment_sign)
+            if need_query_params:
+                parsed_tags.add("need-query-parameters")
+            random_settings_limits_line = next_line if parsed_tags else tag_line
+            random_settings_limits = parse_random_settings_limits_from_line(
+                random_settings_limits_line, comment_sign
+            )
             return parsed_tags, random_settings_limits
 
         all_tags = {}
         all_random_settings_limits = {}
         start_time = datetime.now()
         for test_name in all_tests:
-            (
-                tags,
-                random_settings_limits,
-            ) = load_tags_and_random_settings_limits_from_file(
-                os.path.join(suite_dir, test_name)
-            )
+            tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203
             if tags:
                 all_tags[test_name] = tags
             if random_settings_limits:
@@ -2061,9 +2052,7 @@ class TestSuite:
             )
         )
         self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0]
-        self.all_random_settings_limits: Dict[
-            str, Dict[str, (int, int)]
-        ] = all_tags_and_random_settings_limits[1]
+        self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203
 
         self.sequential_tests = []
         self.parallel_tests = []

From 74a2976810b86086819ee8e6ee1f110ab1e70a37 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 6 Aug 2024 08:13:03 +0000
Subject: [PATCH 091/363] Fix pylint

---
 tests/clickhouse-test | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index bcb8a12625b..84f33860484 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1072,12 +1072,12 @@ class TestCase:
     def apply_random_settings_limits(self, random_settings):
         for setting in random_settings:
             if setting in self.random_settings_limits:
-                min = self.random_settings_limits[setting][0]
-                if min and random_settings[setting] < min:
-                    random_settings[setting] = min
-                max = self.random_settings_limits[setting][1]
-                if max and random_settings[setting] > max:
-                    random_settings[setting] = max
+                min_value = self.random_settings_limits[setting][0]
+                if min_value and random_settings[setting] < min_value:
+                    random_settings[setting] = min_value
+                max_value = self.random_settings_limits[setting][1]
+                if max_value and random_settings[setting] > max_value:
+                    random_settings[setting] = max_value
 
     def __init__(self, suite, case: str, args, is_concurrent: bool):
         self.case: str = case  # case file name
@@ -2005,7 +2005,7 @@ class TestSuite:
         all_random_settings_limits = {}
         start_time = datetime.now()
         for test_name in all_tests:
-            tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203
+            tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name))  # noqa: ignore E203
             if tags:
                 all_tags[test_name] = tags
             if random_settings_limits:
@@ -2052,7 +2052,7 @@ class TestSuite:
             )
         )
         self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0]
-        self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203
+        self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1]  # noqa: ignore E203
 
         self.sequential_tests = []
         self.parallel_tests = []

From 5226792b1d8b4e110c63a813fb68c9dd65ea07b7 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 6 Aug 2024 08:48:06 +0000
Subject: [PATCH 092/363] Fix bad merge with master

---
 tests/clickhouse-test | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 84f33860484..c4124982442 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1163,6 +1163,9 @@ class TestCase:
         elif args.cloud and ("no-replicated-database" in tags):
             return FailureReason.REPLICATED_DB
 
+        elif tags and ("no-distributed-cache" in tags) and args.distributed_cache:
+            return FailureReason.DISTRIBUTED_CACHE
+
         elif args.cloud and self.name in suite.cloud_skip_list:
             return FailureReason.NOT_SUPPORTED_IN_CLOUD
 
@@ -1246,6 +1249,11 @@ class TestCase:
         ):
             return FailureReason.SKIP
 
+        elif "no-flaky-check" in tags and (
+                1 == int(os.environ.get("IS_FLAKY_CHECK", 0))
+        ):
+            return FailureReason.SKIP
+
         elif tags:
             for build_flag in args.build_flags:
                 if "no-" + build_flag in tags:
@@ -2274,7 +2282,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool
                         args, test_suite, client_options, server_logs_level
                     )
                     test_result = test_case.process_result(test_result, MESSAGES)
-                    break
                 except TimeoutError:
                     break
                 finally:
@@ -3260,6 +3267,12 @@ def parse_args():
         default=False,
         help="Run tests over s3 storage",
     )
+    parser.add_argument(
+        "--distributed-cache",
+        action="store_true",
+        default=False,
+        help="Run tests with enabled distributed cache",
+    )
     parser.add_argument(
         "--azure-blob-storage",
         action="store_true",

From bb33dca38470aba044da06938cc96ca55166262d Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 6 Aug 2024 08:49:08 +0000
Subject: [PATCH 093/363] Fix unrelated changes

---
 tests/clickhouse-test | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index c4124982442..72136404796 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -682,6 +682,7 @@ class FailureReason(enum.Enum):
     BUILD = "not running for current build"
     NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas"
     SHARED_MERGE_TREE = "no-shared-merge-tree"
+    DISTRIBUTED_CACHE = "distributed-cache"
 
     # UNKNOWN reasons
     NO_REFERENCE = "no reference file"
@@ -1163,9 +1164,6 @@ class TestCase:
         elif args.cloud and ("no-replicated-database" in tags):
             return FailureReason.REPLICATED_DB
 
-        elif tags and ("no-distributed-cache" in tags) and args.distributed_cache:
-            return FailureReason.DISTRIBUTED_CACHE
-
         elif args.cloud and self.name in suite.cloud_skip_list:
             return FailureReason.NOT_SUPPORTED_IN_CLOUD
 
@@ -1212,6 +1210,9 @@ class TestCase:
         elif tags and ("no-replicated-database" in tags) and args.replicated_database:
             return FailureReason.REPLICATED_DB
 
+        elif tags and ("no-distributed-cache" in tags) and args.distributed_cache:
+            return FailureReason.DISTRIBUTED_CACHE
+
         elif (
             tags
             and ("atomic-database" in tags)
@@ -1250,7 +1251,7 @@ class TestCase:
             return FailureReason.SKIP
 
         elif "no-flaky-check" in tags and (
-                1 == int(os.environ.get("IS_FLAKY_CHECK", 0))
+            1 == int(os.environ.get("IS_FLAKY_CHECK", 0))
         ):
             return FailureReason.SKIP
 

From 71c06b40cbf65abda49579bf5ac08e46575c7d29 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 6 Aug 2024 09:07:21 +0000
Subject: [PATCH 094/363] Avoid regexp

---
 docs/en/development/tests.md                       |  4 ++--
 tests/clickhouse-test                              | 14 +++++++-------
 ...amic_merges_1_horizontal_compact_merge_tree.sql |  2 +-
 ...namic_merges_1_horizontal_compact_wide_tree.sql |  2 +-
 ...ynamic_merges_1_vertical_compact_merge_tree.sql |  2 +-
 ...7_dynamic_merges_1_vertical_wide_merge_tree.sql |  2 +-
 ...amic_merges_2_horizontal_compact_merge_tree.sql |  2 +-
 ...dynamic_merges_2_horizontal_wide_merge_tree.sql |  2 +-
 ...ynamic_merges_2_vertical_compact_merge_tree.sql |  2 +-
 ...7_dynamic_merges_2_vertical_wide_merge_tree.sql |  2 +-
 ...38_nested_dynamic_merges_compact_horizontal.sql |  2 +-
 ...3038_nested_dynamic_merges_compact_vertical.sql |  2 +-
 ...03038_nested_dynamic_merges_wide_horizontal.sql |  2 +-
 .../03038_nested_dynamic_merges_wide_vertical.sql  |  2 +-
 14 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md
index f0afa983fec..bc9f85ef323 100644
--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@@ -100,14 +100,14 @@ For `.sh` tests limits are written as a comment on the line next to tags or on t
 ```bash
 #!/usr/bin/env bash
 # Tags: no-fasttest
-# Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None)
+# Random settings limits: max_block_size=(1000, 10000); index_granularity=(100, None)
 ```
 
 For `.sql` tests tags are placed as a SQL comment in the line next to tags or in the first line:
 
 ```sql
 -- Tags: no-fasttest
--- Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None)
+-- Random settings limits: max_block_size=(1000, 10000); index_granularity=(100, None)
 SELECT 1
 ```
 
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 72136404796..e5378e8c7f3 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1964,13 +1964,13 @@ class TestSuite:
             if not random_settings_limits_str.startswith(random_settings_limits_prefix):
                 return {}
             random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :]  # noqa: ignore E203
-            # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...'
-            random_settings_limits = re.findall(
-                "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str
-            )
-            random_settings_limits = {
-                pair[0]: make_tuple(pair[1]) for pair in random_settings_limits
-            }
+            # limits are specified in a form 'setting1=(min, max); setting2=(min,max); ...'
+            random_settings_limits = {}
+            for setting_and_limit in random_settings_limits_str.split(';'):
+                setting_and_limit = setting_and_limit.split('=')
+                random_settings_limits[setting_and_limit[0].strip()] = make_tuple(
+                    setting_and_limit[1]
+                )
             return random_settings_limits
 
         def is_shebang(line: str) -> bool:
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
index 7c2e7c3d2be..46f1c78b255 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type=1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
index aa62435188a..bf0c6ef0374 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type=1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
index bfc7bb9d206..fb82369a7a3 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type=1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
index 233667db0a7..c026bc04a56 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type=1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql
index 71c6841515a..7f1934091f2 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type = 1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql
index 94ae1d867f5..f1f387fae9d 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type = 1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql
index 98ae230636a..cc11c454d38 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type = 1;
 
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql
index f8f5bd5d9e1..ffb2aca8b35 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_dynamic_type = 1;
 
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
index 13c1fd8b485..9ec4e4f949b 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
index daa95071cdb..ed4de931841 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
index dea7e7c0971..bd3c4b58a8f 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
@@ -1,5 +1,5 @@
 -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
index bf1323f2ea9..81bcda5443d 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
@@ -1,5 +1,5 @@
 -- Tags: long,  no-tsan, no-msan, no-ubsan, no-asan
--- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None)
+-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None)
 
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;

From 621f4bbf9e04b62628a9c053b3f39c6b8a67a52d Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Tue, 6 Aug 2024 14:13:20 +0200
Subject: [PATCH 095/363] Update SettingsChangesHistory.cpp

---
 src/Core/SettingsChangesHistory.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index f815a21b6a1..bb062deaab0 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -79,6 +79,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
             {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
             {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
+            {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},        
         }
     },
     {"24.7",

From 56415028d6be64b37bf9d3065f846ee1455f2711 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 6 Aug 2024 15:01:10 +0200
Subject: [PATCH 096/363] Fix pylint

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e5378e8c7f3..dea303ecdfb 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1086,7 +1086,7 @@ class TestCase:
         self.random_settings_limits = (
             suite.all_random_settings_limits[case]
             if case in suite.all_random_settings_limits
-            else dict()
+            else {}
         )
 
         for tag in os.getenv("GLOBAL_TAGS", "").split(","):

From 3ebc3852f404a1ce392e9b66b8356fa9da701097 Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Mon, 22 Jul 2024 15:28:29 +0000
Subject: [PATCH 097/363] Allow filtering ip addresses by ip family in DNS
 resolver

---
 programs/server/Server.cpp               |   3 +
 src/Common/DNSResolver.cpp               |  93 ++++++--
 src/Common/DNSResolver.h                 |   9 +
 src/Core/ServerSettings.h                |   2 +
 src/Core/SettingsChangesHistory.cpp      | 261 +++++++++++++++++++++++
 tests/integration/test_dns_cache/test.py |  67 +++++-
 6 files changed, 418 insertions(+), 17 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 7800ee9ff00..3126f65ef09 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1768,6 +1768,9 @@ try
                     new_server_settings.http_connections_store_limit,
                 });
 
+            DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6);
+
+
             if (global_context->isServerCompletelyStarted())
                 CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
 
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index 4b577a251af..051e6e63091 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -12,6 +12,8 @@
 #include <atomic>
 #include <optional>
 #include <string_view>
+#include <Core/ServerSettings.h>
+#include <Interpreters/Context.h>
 #include <unordered_set>
 #include "DNSPTRResolverProvider.h"
 
@@ -139,12 +141,6 @@ DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
     return addresses;
 }
 
-DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase<std::string, DNSResolver::CacheEntry> & cache, const std::string & host)
-{
-    auto [result, _ ] = cache.getOrSet(host, [&host]() {return std::make_shared<DNSResolver::CacheEntry>(resolveIPAddressImpl(host), std::chrono::system_clock::now());});
-    return result->addresses;
-}
-
 std::unordered_set<String> reverseResolveImpl(const Poco::Net::IPAddress & address)
 {
     auto ptr_resolver = DB::DNSPTRResolverProvider::get();
@@ -198,21 +194,90 @@ struct DNSResolver::Impl
     std::atomic<bool> disable_cache{false};
 };
 
+struct DNSResolver::AddressFilter
+{
+    struct DNSFilterSettings
+    {
+        std::atomic<bool> dns_allow_resolve_names_to_ipv4{true};
+        std::atomic<bool> dns_allow_resolve_names_to_ipv6{true};
+    };
 
-DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()), log(getLogger("DNSResolver")) {}
+    void performAddressFiltering(DNSResolver::IPAddresses & addresses)
+    {
+        bool dns_resolve_ipv4 = settings.dns_allow_resolve_names_to_ipv4;
+        bool dns_resolve_ipv6 = settings.dns_allow_resolve_names_to_ipv6;
+
+        if (dns_resolve_ipv4 && dns_resolve_ipv6)
+        {
+            return;
+        }
+        if (!dns_resolve_ipv4 && !dns_resolve_ipv6)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled");
+        }
+        addresses.erase(
+            std::remove_if(addresses.begin(), addresses.end(),
+            [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address)
+            {
+                return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6)
+                    || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4);
+            }),
+            addresses.end()
+        );
+    }
+
+    void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_)
+    {
+        settings.dns_allow_resolve_names_to_ipv4 =  dns_allow_resolve_names_to_ipv4_;
+        settings.dns_allow_resolve_names_to_ipv6 =  dns_allow_resolve_names_to_ipv6_;
+    }
+
+    DNSFilterSettings settings;
+};
+
+
+DNSResolver::DNSResolver()
+    : impl(std::make_unique<DNSResolver::Impl>())
+    , addressFilter(std::make_unique<DNSResolver::AddressFilter>())
+    , log(getLogger("DNSResolver")) {}
+
+
+DNSResolver::IPAddresses DNSResolver::getResolvedIPAdressessWithFiltering(const std::string & host)
+{
+    auto addresses = resolveIPAddressImpl(host);
+    addressFilter->performAddressFiltering(addresses);
+
+    if (addresses.empty())
+    {
+        ProfileEvents::increment(ProfileEvents::DNSError);
+        throw DB::NetException(ErrorCodes::DNS_ERROR, "After filtering there are no resolved address for host({}).", host);
+    }
+    return addresses;
+}
+
+DNSResolver::IPAddresses DNSResolver::resolveIPAddressWithCache(const std::string & host)
+{
+    auto [result, _ ] = impl->cache_host.getOrSet(host, [&host, this]() {return std::make_shared<DNSResolver::CacheEntry>(getResolvedIPAdressessWithFiltering(host), std::chrono::system_clock::now());});
+    return result->addresses;
+}
 
 Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host)
 {
     return pickAddress(resolveHostAll(host)); // random order -> random pick
 }
 
+void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_)
+{
+    addressFilter->setSettings(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_);
+}
+
 DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host)
 {
     if (impl->disable_cache)
-        return resolveIPAddressImpl(host);
+        return getResolvedIPAdressessWithFiltering(host);
 
     addToNewHosts(host);
-    return resolveIPAddressWithCache(impl->cache_host, host);
+    return resolveIPAddressWithCache(host);
 }
 
 DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host)
@@ -232,7 +297,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an
     splitHostAndPort(host_and_port, host, port);
 
     addToNewHosts(host);
-    return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port);
+    return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port);
 }
 
 Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port)
@@ -241,7 +306,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U
         return Poco::Net::SocketAddress(host, port);
 
     addToNewHosts(host);
-    return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port);
+    return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port);
 }
 
 std::vector<Poco::Net::SocketAddress> DNSResolver::resolveAddressList(const std::string & host, UInt16 port)
@@ -254,7 +319,7 @@ std::vector<Poco::Net::SocketAddress> DNSResolver::resolveAddressList(const std:
     if (!impl->disable_cache)
         addToNewHosts(host);
 
-    std::vector<Poco::Net::IPAddress> ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(impl->cache_host, host);
+    std::vector<Poco::Net::IPAddress> ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(host);
     auto ips_end = std::unique(ips.begin(), ips.end());
 
     addresses.reserve(ips_end - ips.begin());
@@ -419,8 +484,8 @@ bool DNSResolver::updateCache(UInt32 max_consecutive_failures)
 
 bool DNSResolver::updateHost(const String & host)
 {
-    const auto old_value = resolveIPAddressWithCache(impl->cache_host, host);
-    auto new_value = resolveIPAddressImpl(host);
+    const auto old_value = resolveIPAddressWithCache(host);
+    auto new_value = getResolvedIPAdressessWithFiltering(host);
     const bool result = old_value != new_value;
     impl->cache_host.set(host, std::make_shared<DNSResolver::CacheEntry>(std::move(new_value), std::chrono::system_clock::now()));
     return result;
diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h
index 1ddd9d3b991..b35f55dfcd2 100644
--- a/src/Common/DNSResolver.h
+++ b/src/Common/DNSResolver.h
@@ -68,6 +68,8 @@ public:
     /// Returns true if IP of any host has been changed or an element was dropped (too many failures)
     bool updateCache(UInt32 max_consecutive_failures);
 
+    void setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6);
+
     /// Returns a copy of cache entries
     std::vector<std::pair<std::string, CacheEntry>> cacheEntries() const;
 
@@ -86,6 +88,10 @@ private:
 
     struct Impl;
     std::unique_ptr<Impl> impl;
+
+    struct AddressFilter;
+    std::unique_ptr<AddressFilter> addressFilter;
+
     LoggerPtr log;
 
     /// Updates cached value and returns true it has been changed.
@@ -94,6 +100,9 @@ private:
 
     void addToNewHosts(const String & host);
     void addToNewAddresses(const Poco::Net::IPAddress & address);
+
+    IPAddresses resolveIPAddressWithCache(const std::string & host);
+    IPAddresses getResolvedIPAdressessWithFiltering(const std::string & host);
 };
 
 }
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index d13e6251ca9..6c23e3b95f6 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -106,6 +106,8 @@ namespace DB
     M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \
     M(Int32,  dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
     M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
+    M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \
+    M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \
     \
     M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
     M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 0ccbd874a3d..a76e214b1fc 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -500,6 +500,267 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}
         }
     },
+    {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
+              {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"},
+              {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"},
+              {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"},
+              {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"},
+              {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"},
+              {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
+              {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."},
+              {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
+              {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
+              {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
+              {"collect_hash_table_stats_during_joins", false, true, "New setting."},
+              {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
+              {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."},
+              {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
+              {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
+              {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
+              {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
+              {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"},
+              {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"},
+              {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."},
+              {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."},
+              {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
+              {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."}
+              {"dns_allow_resolve_names_to_ipv4", true, true, "Allows resolve names to ipv4 addresses"},
+              {"dns_allow_resolve_names_to_ipv6", true, true, "Allows resolve names to ipv6 addresses"},
+              }},
+    {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
+              {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
+              {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
+              {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
+              {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
+              {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
+              {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"},
+              {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
+              {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
+              {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
+              {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
+              {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"},
+              {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"},
+              {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."},
+              {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."},
+              {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"},
+              {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."},
+              {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."},
+              {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."},
+              {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."},
+              {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."},
+              {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"},
+              {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"},
+              {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"},
+              {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."},
+              {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."},
+              {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
+              {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
+              {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
+              }},
+    {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
+              {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
+              {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
+              {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."},
+              {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
+              {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
+              {"http_max_chunk_size", 0, 0, "Internal limitation"},
+              {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
+              {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
+              {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
+              {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},
+              {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."},
+              }},
+    {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
+              {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"},
+              {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"},
+              {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
+              {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"},
+              {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"},
+              {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."},
+              {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"},
+              {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"},
+              {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"},
+              {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."},
+              {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"},
+              {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"},
+              }},
+    {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
+              {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
+              {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
+              {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
+              {"page_cache_inject_eviction", false, false, "Added userspace page cache"},
+              {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
+              {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
+              {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."},
+              {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
+              {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
+              {"log_processors_profiles", false, true, "Enable by default"},
+              {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."},
+              {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"},
+              {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"},
+              {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"},
+              {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"},
+              {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"},
+              {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"},
+              {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"},
+              {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"},
+              {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"},
+              {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."},
+              {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."},
+              {"allow_get_client_http_header", false, false, "Introduced a new function."},
+              {"output_format_pretty_row_numbers", false, true, "It is better for usability."},
+              {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."},
+              {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
+              {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
+              {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
+              {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
+              {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
+              {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."},
+              {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."},
+              {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."},
+              {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."},
+              {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."},
+              {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."},
+              {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."},
+              {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."},
+              {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
+              {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
+              {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
+              }},
+    {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
+              {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},
+              {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"},
+              {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"},
+              {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"},
+              {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"},
+              {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
+              {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
+              {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"},
+              {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
+              {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
+              {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
+              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
+              {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
+              {"format_template_resultset_format", "", "", "Template result set format string can be set in query"},
+              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
+              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"},
+              {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."},
+              {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"},
+              {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."},
+              {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."},
+              {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"},
+              {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."},
+              {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"},
+              {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"},
+              {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"},
+              {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
+              {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
+              }},
+    {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
+              {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
+              {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
+              {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"},
+              {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"},
+              {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
+              {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"},
+              {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"},
+              {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"},
+              {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"},
+              {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"},
+              {"enable_vertical_final", false, true, "Use vertical final by default"},
+              {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
+              {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"},
+              {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"},
+              {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
+              {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
+              {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
+              {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
+              {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
+              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"},
+              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
+              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
+    {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
+              {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
+              {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
+              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
+    {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
+    {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
+              {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
+              {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
+              {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"},
+              {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"},
+              {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"},
+              {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}},
+    {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}},
+    {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
+    {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
+              {"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
+    {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
+              {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."},
+              {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"},
+              {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}},
+    {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"},
+              {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."},
+              {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
+              {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"},
+              {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"},
+              {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"},
+              {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
+    {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
+              {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"},
+              {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"},
+              {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"},
+              {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"},
+              {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"},
+              {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}},
+    {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
+              {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
+              {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},
+              {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"},
+              {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}},
+    {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"},
+              {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"},
+              {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"},
+              {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"},
+              {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"},
+              {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}},
+    {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"},
+               {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
+               {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
+    {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
+    {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
+    {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
+              {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
+              {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}},
+    {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"},
+              {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}},
+    {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"},
+              {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}},
+    {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}},
+    {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}},
+    {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}},
+    {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"},
+              {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}},
+    {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}},
+    {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}},
+    {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"},
+              {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"},
+              {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}},
+    {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}},
+    {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"},
+              {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"},
+              {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"},
+              {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}},
+    {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}},
+    {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing  UID of the table in its CREATE query for Engine=Atomic"}}},
+    {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"},
+              {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}},
+    {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}},
+    {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}},
+    {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}},
+    {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}},
+    {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}},
+    {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}},
 };
 
 
diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py
index a6db26c8575..5e120dc42aa 100644
--- a/tests/integration/test_dns_cache/test.py
+++ b/tests/integration/test_dns_cache/test.py
@@ -32,6 +32,7 @@ node2 = cluster.add_instance(
     main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"],
     with_zookeeper=True,
     ipv6_address="2001:3984:3989::1:1112",
+    ipv4_address="10.5.95.11",
 )
 
 
@@ -39,9 +40,6 @@ node2 = cluster.add_instance(
 def cluster_without_dns_cache_update():
     try:
         cluster.start()
-
-        _fill_nodes([node1, node2], "test_table_drop")
-
         yield cluster
 
     except Exception as ex:
@@ -59,6 +57,8 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update):
     # In this case we should manually set up the static DNS entries on the source host
     # to exclude resplving addresses automatically added by docker.
     # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address.
+    _fill_nodes([node1, node2], "test_table_drop")
+
     node2.set_hosts([("2001:3984:3989::1:1111", "node1")])
     # drop DNS cache
     node2.query("SYSTEM DROP DNS CACHE")
@@ -98,6 +98,67 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update):
     assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7")
 
 
+def _render_filter_config(allow_ipv4, allow_ipv6):
+    config = f"""
+    <clickhouse>
+        <dns_allow_resolve_names_to_ipv4>{int(allow_ipv4)}</dns_allow_resolve_names_to_ipv4>
+        <dns_allow_resolve_names_to_ipv6>{int(allow_ipv6)}</dns_allow_resolve_names_to_ipv6>
+    </clickhouse>
+    """
+    return config
+
+
+@pytest.mark.parametrize(
+    "allow_ipv4, allow_ipv6",
+    [
+        (True, False),
+        (False, True),
+        (False, False),
+    ],
+)
+def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6):
+    host_ipv6 = node2.ipv6_address
+    host_ipv4 = node2.ipv4_address
+
+    node2.set_hosts(
+        [
+            (host_ipv6, "test_host"),
+            (host_ipv4, "test_host"),
+        ]
+    )
+    node2.replace_config(
+        "/etc/clickhouse-server/config.d/dns_filter.xml",
+        _render_filter_config(allow_ipv4, allow_ipv6),
+    )
+
+    node2.query("SYSTEM DROP DNS CACHE")
+    node2.query("SYSTEM DROP CONNECTIONS CACHE")
+    node2.query("SYSTEM RELOAD CONFIG")
+
+    if not allow_ipv4 and not allow_ipv6:
+        with pytest.raises(QueryRuntimeException):
+            node4.query("SELECT * FROM remote('lost_host', 'system', 'one')")
+    else:
+        node2.query("SELECT * FROM remote('test_host', system, one)")
+        assert (
+            node2.query(
+                "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'"
+            )
+            == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n"
+        )
+
+    node2.exec_in_container(
+        [
+            "bash",
+            "-c",
+            "rm /etc/clickhouse-server/config.d/dns_filter.xml",
+        ],
+        privileged=True,
+        user="root",
+    )
+    node2.query("SYSTEM RELOAD CONFIG")
+
+
 node3 = cluster.add_instance(
     "node3",
     main_configs=["configs/listen_host.xml"],

From 0d5cb9f75a527e90aed18860efbd5ed1f9dcd775 Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Tue, 23 Jul 2024 09:25:02 +0000
Subject: [PATCH 098/363] Review fixes

---
 src/Common/DNSResolver.cpp          | 34 ++++++++++++++---------------
 src/Core/SettingsChangesHistory.cpp |  2 +-
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index 051e6e63091..08111d7f2af 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -12,8 +12,7 @@
 #include <atomic>
 #include <optional>
 #include <string_view>
-#include <Core/ServerSettings.h>
-#include <Interpreters/Context.h>
+#include "Common/MultiVersion.h"
 #include <unordered_set>
 #include "DNSPTRResolverProvider.h"
 
@@ -198,14 +197,17 @@ struct DNSResolver::AddressFilter
 {
     struct DNSFilterSettings
     {
-        std::atomic<bool> dns_allow_resolve_names_to_ipv4{true};
-        std::atomic<bool> dns_allow_resolve_names_to_ipv6{true};
+        bool dns_allow_resolve_names_to_ipv4{true};
+        bool dns_allow_resolve_names_to_ipv6{true};
     };
 
+    AddressFilter() : settings(std::make_unique<DNSFilterSettings>()) {}
+
     void performAddressFiltering(DNSResolver::IPAddresses & addresses)
     {
-        bool dns_resolve_ipv4 = settings.dns_allow_resolve_names_to_ipv4;
-        bool dns_resolve_ipv6 = settings.dns_allow_resolve_names_to_ipv6;
+        const auto current_settings = settings.get();
+        bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4;
+        bool dns_resolve_ipv6 = current_settings->dns_allow_resolve_names_to_ipv6;
 
         if (dns_resolve_ipv4 && dns_resolve_ipv6)
         {
@@ -215,24 +217,20 @@ struct DNSResolver::AddressFilter
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled");
         }
-        addresses.erase(
-            std::remove_if(addresses.begin(), addresses.end(),
-            [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address)
-            {
-                return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6)
-                    || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4);
-            }),
-            addresses.end()
-        );
+
+        std::erase_if(addresses, [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address)
+        {
+            return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6)
+                || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4);
+        });
     }
 
     void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_)
     {
-        settings.dns_allow_resolve_names_to_ipv4 =  dns_allow_resolve_names_to_ipv4_;
-        settings.dns_allow_resolve_names_to_ipv6 =  dns_allow_resolve_names_to_ipv6_;
+        settings.set(std::make_unique<DNSFilterSettings>(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_));
     }
 
-    DNSFilterSettings settings;
+    MultiVersion<DNSFilterSettings> settings;
 };
 
 
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index a76e214b1fc..01b9bca795f 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -523,7 +523,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
               {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."},
               {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."},
               {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
-              {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."}
+              {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."},
               {"dns_allow_resolve_names_to_ipv4", true, true, "Allows resolve names to ipv4 addresses"},
               {"dns_allow_resolve_names_to_ipv6", true, true, "Allows resolve names to ipv6 addresses"},
               }},

From 9d491c8cf79c5d45af47306eae45265a9c1ba4c3 Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Tue, 23 Jul 2024 12:58:50 +0000
Subject: [PATCH 099/363] Fix test

---
 src/Core/SettingsChangesHistory.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 01b9bca795f..ac427e2e03e 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -523,9 +523,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
               {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."},
               {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."},
               {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
-              {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."},
-              {"dns_allow_resolve_names_to_ipv4", true, true, "Allows resolve names to ipv4 addresses"},
-              {"dns_allow_resolve_names_to_ipv6", true, true, "Allows resolve names to ipv6 addresses"},
+              {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."}
               }},
     {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
               {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},

From 1d5ed32ff456eec5eabad6c3e93d8c80eefb3e5e Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Tue, 23 Jul 2024 16:56:29 +0000
Subject: [PATCH 100/363] Fix tidy build

---
 src/Common/DNSResolver.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index 08111d7f2af..bbee7d259f0 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -203,7 +203,7 @@ struct DNSResolver::AddressFilter
 
     AddressFilter() : settings(std::make_unique<DNSFilterSettings>()) {}
 
-    void performAddressFiltering(DNSResolver::IPAddresses & addresses)
+    void performAddressFiltering(DNSResolver::IPAddresses & addresses) const
     {
         const auto current_settings = settings.get();
         bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4;

From 304d01e2c36ae11cd1c703135c493ee885bc3062 Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Mon, 29 Jul 2024 19:18:14 +0000
Subject: [PATCH 101/363] Review fix

---
 programs/server/Server.cpp | 1 -
 src/Common/DNSResolver.cpp | 8 ++++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 3126f65ef09..aa7c3d75163 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1770,7 +1770,6 @@ try
 
             DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6);
 
-
             if (global_context->isServerCompletelyStarted())
                 CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
 
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index bbee7d259f0..68a8fa7d74c 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -225,9 +225,9 @@ struct DNSResolver::AddressFilter
         });
     }
 
-    void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_)
+    void setSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6)
     {
-        settings.set(std::make_unique<DNSFilterSettings>(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_));
+        settings.set(std::make_unique<DNSFilterSettings>(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6));
     }
 
     MultiVersion<DNSFilterSettings> settings;
@@ -264,9 +264,9 @@ Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host)
     return pickAddress(resolveHostAll(host)); // random order -> random pick
 }
 
-void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_)
+void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6)
 {
-    addressFilter->setSettings(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_);
+    addressFilter->setSettings(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6);
 }
 
 DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host)

From 4143eea587b808086cceb98025906646fa78c96a Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Wed, 31 Jul 2024 08:35:38 +0000
Subject: [PATCH 102/363] Add test to skip parallel

---
 tests/integration/parallel_skip.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json
index 99fa626bd1e..6689572aeb7 100644
--- a/tests/integration/parallel_skip.json
+++ b/tests/integration/parallel_skip.json
@@ -1,6 +1,7 @@
 [
   "test_dns_cache/test.py::test_dns_cache_update",
   "test_dns_cache/test.py::test_ip_change_drop_dns_cache",
+  "test_dns_cache/test.py::test_dns_resolver_filter",
   "test_dns_cache/test.py::test_ip_change_update_dns_cache",
   "test_dns_cache/test.py::test_user_access_ip_change[node0]",
   "test_dns_cache/test.py::test_user_access_ip_change[node1]",

From 012ea3cc6d09c50f29fe4d7964aa18ee038c35b2 Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Wed, 31 Jul 2024 13:29:36 +0000
Subject: [PATCH 103/363] Rebase

---
 src/Core/SettingsChangesHistory.cpp      | 259 -----------------------
 tests/integration/test_dns_cache/test.py | 138 ++++++------
 2 files changed, 74 insertions(+), 323 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index ac427e2e03e..0ccbd874a3d 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -500,265 +500,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}
         }
     },
-    {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
-              {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"},
-              {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"},
-              {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"},
-              {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"},
-              {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"},
-              {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
-              {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."},
-              {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
-              {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
-              {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
-              {"collect_hash_table_stats_during_joins", false, true, "New setting."},
-              {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
-              {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."},
-              {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
-              {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
-              {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
-              {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
-              {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"},
-              {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"},
-              {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."},
-              {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."},
-              {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
-              {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."}
-              }},
-    {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
-              {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
-              {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
-              {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
-              {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
-              {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
-              {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"},
-              {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
-              {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
-              {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
-              {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
-              {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"},
-              {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"},
-              {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."},
-              {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."},
-              {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"},
-              {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."},
-              {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."},
-              {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."},
-              {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."},
-              {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."},
-              {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"},
-              {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"},
-              {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"},
-              {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."},
-              {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."},
-              {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
-              {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
-              {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
-              }},
-    {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
-              {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
-              {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
-              {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."},
-              {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
-              {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
-              {"http_max_chunk_size", 0, 0, "Internal limitation"},
-              {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
-              {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
-              {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
-              {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},
-              {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."},
-              }},
-    {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
-              {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"},
-              {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"},
-              {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
-              {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"},
-              {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"},
-              {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."},
-              {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"},
-              {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"},
-              {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"},
-              {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."},
-              {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"},
-              {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"},
-              }},
-    {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
-              {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
-              {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
-              {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
-              {"page_cache_inject_eviction", false, false, "Added userspace page cache"},
-              {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
-              {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
-              {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."},
-              {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
-              {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
-              {"log_processors_profiles", false, true, "Enable by default"},
-              {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."},
-              {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"},
-              {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"},
-              {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"},
-              {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"},
-              {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"},
-              {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"},
-              {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"},
-              {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"},
-              {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"},
-              {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."},
-              {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."},
-              {"allow_get_client_http_header", false, false, "Introduced a new function."},
-              {"output_format_pretty_row_numbers", false, true, "It is better for usability."},
-              {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."},
-              {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
-              {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
-              {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
-              {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
-              {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
-              {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."},
-              {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."},
-              {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."},
-              {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."},
-              {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."},
-              {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."},
-              {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."},
-              {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."},
-              {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
-              {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
-              {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
-              }},
-    {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
-              {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},
-              {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"},
-              {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"},
-              {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"},
-              {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"},
-              {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
-              {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
-              {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"},
-              {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
-              {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
-              {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
-              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
-              {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
-              {"format_template_resultset_format", "", "", "Template result set format string can be set in query"},
-              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
-              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"},
-              {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."},
-              {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"},
-              {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."},
-              {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."},
-              {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"},
-              {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."},
-              {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"},
-              {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"},
-              {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"},
-              {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
-              {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
-              }},
-    {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
-              {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
-              {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
-              {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"},
-              {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"},
-              {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
-              {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"},
-              {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"},
-              {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"},
-              {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"},
-              {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"},
-              {"enable_vertical_final", false, true, "Use vertical final by default"},
-              {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
-              {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"},
-              {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"},
-              {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
-              {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
-              {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
-              {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
-              {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
-              {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"},
-              {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
-              {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
-    {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
-              {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
-              {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
-              {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
-    {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
-    {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
-              {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
-              {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
-              {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"},
-              {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"},
-              {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"},
-              {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}},
-    {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}},
-    {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
-    {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
-              {"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
-    {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
-              {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."},
-              {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"},
-              {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}},
-    {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"},
-              {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."},
-              {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
-              {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"},
-              {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"},
-              {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"},
-              {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
-    {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
-              {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"},
-              {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"},
-              {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"},
-              {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"},
-              {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"},
-              {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}},
-    {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
-              {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
-              {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},
-              {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"},
-              {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}},
-    {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"},
-              {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"},
-              {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"},
-              {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"},
-              {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"},
-              {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}},
-    {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"},
-               {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
-               {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
-    {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
-    {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
-    {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
-              {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
-              {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}},
-    {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"},
-              {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}},
-    {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"},
-              {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}},
-    {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}},
-    {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}},
-    {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}},
-    {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"},
-              {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}},
-    {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}},
-    {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}},
-    {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"},
-              {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"},
-              {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}},
-    {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}},
-    {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"},
-              {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"},
-              {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"},
-              {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}},
-    {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}},
-    {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing  UID of the table in its CREATE query for Engine=Atomic"}}},
-    {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"},
-              {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}},
-    {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}},
-    {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}},
-    {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}},
-    {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}},
-    {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}},
-    {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}},
 };
 
 
diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py
index 5e120dc42aa..36401517429 100644
--- a/tests/integration/test_dns_cache/test.py
+++ b/tests/integration/test_dns_cache/test.py
@@ -32,7 +32,6 @@ node2 = cluster.add_instance(
     main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"],
     with_zookeeper=True,
     ipv6_address="2001:3984:3989::1:1112",
-    ipv4_address="10.5.95.11",
 )
 
 
@@ -40,6 +39,9 @@ node2 = cluster.add_instance(
 def cluster_without_dns_cache_update():
     try:
         cluster.start()
+
+        _fill_nodes([node1, node2], "test_table_drop")
+
         yield cluster
 
     except Exception as ex:
@@ -57,8 +59,6 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update):
     # In this case we should manually set up the static DNS entries on the source host
     # to exclude resplving addresses automatically added by docker.
     # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address.
-    _fill_nodes([node1, node2], "test_table_drop")
-
     node2.set_hosts([("2001:3984:3989::1:1111", "node1")])
     # drop DNS cache
     node2.query("SYSTEM DROP DNS CACHE")
@@ -98,67 +98,6 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update):
     assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7")
 
 
-def _render_filter_config(allow_ipv4, allow_ipv6):
-    config = f"""
-    <clickhouse>
-        <dns_allow_resolve_names_to_ipv4>{int(allow_ipv4)}</dns_allow_resolve_names_to_ipv4>
-        <dns_allow_resolve_names_to_ipv6>{int(allow_ipv6)}</dns_allow_resolve_names_to_ipv6>
-    </clickhouse>
-    """
-    return config
-
-
-@pytest.mark.parametrize(
-    "allow_ipv4, allow_ipv6",
-    [
-        (True, False),
-        (False, True),
-        (False, False),
-    ],
-)
-def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6):
-    host_ipv6 = node2.ipv6_address
-    host_ipv4 = node2.ipv4_address
-
-    node2.set_hosts(
-        [
-            (host_ipv6, "test_host"),
-            (host_ipv4, "test_host"),
-        ]
-    )
-    node2.replace_config(
-        "/etc/clickhouse-server/config.d/dns_filter.xml",
-        _render_filter_config(allow_ipv4, allow_ipv6),
-    )
-
-    node2.query("SYSTEM DROP DNS CACHE")
-    node2.query("SYSTEM DROP CONNECTIONS CACHE")
-    node2.query("SYSTEM RELOAD CONFIG")
-
-    if not allow_ipv4 and not allow_ipv6:
-        with pytest.raises(QueryRuntimeException):
-            node4.query("SELECT * FROM remote('lost_host', 'system', 'one')")
-    else:
-        node2.query("SELECT * FROM remote('test_host', system, one)")
-        assert (
-            node2.query(
-                "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'"
-            )
-            == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n"
-        )
-
-    node2.exec_in_container(
-        [
-            "bash",
-            "-c",
-            "rm /etc/clickhouse-server/config.d/dns_filter.xml",
-        ],
-        privileged=True,
-        user="root",
-    )
-    node2.query("SYSTEM RELOAD CONFIG")
-
-
 node3 = cluster.add_instance(
     "node3",
     main_configs=["configs/listen_host.xml"],
@@ -378,3 +317,74 @@ def test_host_is_drop_from_cache_after_consecutive_failures(
     assert node4.wait_for_log_line(
         "Cached hosts dropped:.*InvalidHostThatDoesNotExist.*"
     )
+
+
+node7 = cluster.add_instance(
+    "node7",
+    main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"],
+    with_zookeeper=True,
+    ipv6_address="2001:3984:3989::1:1117",
+    ipv4_address="10.5.95.17",
+)
+
+
+def _render_filter_config(allow_ipv4, allow_ipv6):
+    config = f"""
+    <clickhouse>
+        <dns_allow_resolve_names_to_ipv4>{int(allow_ipv4)}</dns_allow_resolve_names_to_ipv4>
+        <dns_allow_resolve_names_to_ipv6>{int(allow_ipv6)}</dns_allow_resolve_names_to_ipv6>
+    </clickhouse>
+    """
+    return config
+
+
+@pytest.mark.parametrize(
+    "allow_ipv4, allow_ipv6",
+    [
+        (True, False),
+        (False, True),
+        (False, False),
+    ],
+)
+def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6):
+    node = node7
+    host_ipv6 = node.ipv6_address
+    host_ipv4 = node.ipv4_address
+
+    node.set_hosts(
+        [
+            (host_ipv6, "test_host"),
+            (host_ipv4, "test_host"),
+        ]
+    )
+    node.replace_config(
+        "/etc/clickhouse-server/config.d/dns_filter.xml",
+        _render_filter_config(allow_ipv4, allow_ipv6),
+    )
+
+    node.query("SYSTEM RELOAD CONFIG")
+    node.query("SYSTEM DROP DNS CACHE")
+    node.query("SYSTEM DROP CONNECTIONS CACHE")
+
+    if not allow_ipv4 and not allow_ipv6:
+        with pytest.raises(QueryRuntimeException):
+            node.query("SELECT * FROM remote('lost_host', 'system', 'one')")
+    else:
+        node.query("SELECT * FROM remote('test_host', system, one)")
+        assert (
+            node.query(
+                "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'"
+            )
+            == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n"
+        )
+
+    node.exec_in_container(
+        [
+            "bash",
+            "-c",
+            "rm /etc/clickhouse-server/config.d/dns_filter.xml",
+        ],
+        privileged=True,
+        user="root",
+    )
+    node.query("SYSTEM RELOAD CONFIG")

From d124de847b44344d9346c4d1b76ada03b31c58c8 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 6 Aug 2024 16:06:59 +0000
Subject: [PATCH 104/363] Fix style

---
 tests/clickhouse-test | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index dea303ecdfb..c3b1d4d907c 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1959,15 +1959,17 @@ class TestSuite:
         ) -> Dict[str, Tuple[int, int]]:
             if not line.startswith(comment_sign):
                 return {}
-            random_settings_limits_str = line[len(comment_sign) :].lstrip()  # noqa: ignore E203
+            random_settings_limits_str = line[len(comment_sign) :].lstrip()
             random_settings_limits_prefix = "Random settings limits:"
             if not random_settings_limits_str.startswith(random_settings_limits_prefix):
                 return {}
-            random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :]  # noqa: ignore E203
+            random_settings_limits_str = random_settings_limits_str[
+                len(random_settings_limits_prefix) :
+            ]
             # limits are specified in a form 'setting1=(min, max); setting2=(min,max); ...'
             random_settings_limits = {}
-            for setting_and_limit in random_settings_limits_str.split(';'):
-                setting_and_limit = setting_and_limit.split('=')
+            for setting_and_limit in random_settings_limits_str.split(";"):
+                setting_and_limit = setting_and_limit.split("=")
                 random_settings_limits[setting_and_limit[0].strip()] = make_tuple(
                     setting_and_limit[1]
                 )
@@ -2014,7 +2016,12 @@ class TestSuite:
         all_random_settings_limits = {}
         start_time = datetime.now()
         for test_name in all_tests:
-            tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name))  # noqa: ignore E203
+            (
+                tags,
+                random_settings_limits,
+            ) = load_tags_and_random_settings_limits_from_file(
+                os.path.join(suite_dir, test_name)
+            )  # noqa: ignore E203
             if tags:
                 all_tags[test_name] = tags
             if random_settings_limits:
@@ -2061,7 +2068,9 @@ class TestSuite:
             )
         )
         self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0]
-        self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1]  # noqa: ignore E203
+        self.all_random_settings_limits: Dict[
+            str, Dict[str, (int, int)]
+        ] = all_tags_and_random_settings_limits[1]
 
         self.sequential_tests = []
         self.parallel_tests = []

From 0ebe8e35511f764b61cb2428433132644f7deb96 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Tue, 6 Aug 2024 18:38:23 +0200
Subject: [PATCH 105/363] Fix style

---
 src/Core/SettingsChangesHistory.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index e6949dd4fba..1ebc9b07748 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -79,7 +79,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
             {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
             {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
-            {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},        
+            {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
             {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
         }
     },

From cfeb20681d3af5f2a5d538096e5117be06e7c624 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:42:42 +0200
Subject: [PATCH 106/363] Fix style check

---
 tests/clickhouse-test | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index c3b1d4d907c..5946e561949 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -2068,10 +2068,9 @@ class TestSuite:
             )
         )
         self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0]
-        self.all_random_settings_limits: Dict[
-            str, Dict[str, (int, int)]
-        ] = all_tags_and_random_settings_limits[1]
-
+        self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = (
+            all_tags_and_random_settings_limits[1]
+        )
         self.sequential_tests = []
         self.parallel_tests = []
         for test_name in self.all_tests:

From 725640613b0d1cf47515697b5856a85953b73483 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 7 Aug 2024 22:35:57 +0200
Subject: [PATCH 107/363] Add annotations

---
 .../0_stateless/00111_shard_external_sort_distributed.sql      | 3 ++-
 .../0_stateless/00376_shard_group_uniq_array_of_int_array.sql  | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
index 93efc317bfa..9e06654195d 100644
--- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
+++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql
@@ -1,4 +1,5 @@
--- Tags: distributed, long
+-- Tags: distributed, long, no-flaky-check
+-- ^ no-flaky-check - sometimes longer than 600s with ThreadFuzzer.
 
 SET max_memory_usage = 150000000;
 SET max_bytes_before_external_sort = 10000000;
diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
index 24b7f1c30a6..4453c26283c 100644
--- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
+++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql
@@ -1,4 +1,4 @@
--- Tags: shard
+-- Tags: long
 
 SET max_rows_to_read = '55M';
 

From da6378752fc562f7b8df487f86fe2257215eb96a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 8 Aug 2024 04:07:43 +0200
Subject: [PATCH 108/363] Add annotations

---
 tests/queries/0_stateless/01304_direct_io_long.sh      | 3 ++-
 tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh
index 6ab25eebaf7..867c37667fe 100755
--- a/tests/queries/0_stateless/01304_direct_io_long.sh
+++ b/tests/queries/0_stateless/01304_direct_io_long.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env bash
-# Tags: long, no-object-storage-with-slow-build
+# Tags: long, no-object-storage-with-slow-build, no-flaky-check
+# It can be too long with ThreadFuzzer
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 b/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2
index 47940356302..7df77595347 100644
--- a/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2
+++ b/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2
@@ -1,4 +1,5 @@
--- Tags: long
+-- Tags: long, no-flaky-check
+-- It can be too long with ThreadFuzzer
 
 DROP TABLE IF EXISTS left;
 DROP TABLE IF EXISTS right;

From 4fac40a3cb4823f0014a2c5324593b6ef8a6b6ac Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 8 Aug 2024 04:41:22 +0200
Subject: [PATCH 109/363] Step back

---
 programs/client/Client.cpp     | 10 ++++++++++
 programs/local/LocalServer.cpp |  3 +++
 src/Client/ClientBase.h        |  3 +++
 src/Client/LocalConnection.cpp |  1 +
 src/Client/LocalConnection.h   |  2 ++
 src/Client/Suggest.cpp         |  4 ----
 6 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 0f136664de8..1d99d223ee9 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -19,6 +19,7 @@
 
 #include <Common/Config/ConfigProcessor.h>
 #include <Common/Config/getClientConfigPath.h>
+#include <Common/CurrentThread.h>
 #include <Common/Exception.h>
 #include <Common/TerminalSize.h>
 #include <Common/config_version.h>
@@ -327,6 +328,7 @@ int Client::main(const std::vector<std::string> & /*args*/)
 try
 {
     UseSSL use_ssl;
+    auto & thread_status = MainThreadStatus::getInstance();
     setupSignalHandler();
 
     std::cout << std::fixed << std::setprecision(3);
@@ -341,6 +343,14 @@ try
     initTTYBuffer(toProgressOption(config().getString("progress", "default")));
     ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
 
+    {
+        // All that just to set DB::CurrentThread::get().getGlobalContext()
+        // which is required for client timezone (pushed from server) to work.
+        auto thread_group = std::make_shared<ThreadGroup>();
+        const_cast<ContextWeakPtr&>(thread_group->global_context) = global_context;
+        thread_status.attachToGroup(thread_group, false);
+    }
+
     /// Includes delayed_interactive.
     if (is_interactive)
     {
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index cb89c6c5510..0d731ed0e14 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -29,8 +29,10 @@
 #include <Common/Exception.h>
 #include <Common/Macros.h>
 #include <Common/Config/ConfigProcessor.h>
+#include <Common/ThreadStatus.h>
 #include <Common/TLDListsHolder.h>
 #include <Common/quoteString.h>
+#include <Common/randomSeed.h>
 #include <Common/ThreadPool.h>
 #include <Common/CurrentMetrics.h>
 #include <Loggers/OwnFormattingChannel.h>
@@ -461,6 +463,7 @@ int LocalServer::main(const std::vector<std::string> & /*args*/)
 try
 {
     UseSSL use_ssl;
+    thread_status.emplace();
 
     StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces);
 
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 26deb1eda26..1a23b6b1363 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -296,6 +296,9 @@ protected:
     Settings cmd_settings;
     MergeTreeSettings cmd_merge_tree_settings;
 
+    /// thread status should be destructed before shared context because it relies on process list.
+    std::optional<ThreadStatus> thread_status;
+
     ServerConnectionPtr connection;
     ConnectionParameters connection_parameters;
 
diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp
index b0a5ef99253..072184e0a66 100644
--- a/src/Client/LocalConnection.cpp
+++ b/src/Client/LocalConnection.cpp
@@ -128,6 +128,7 @@ void LocalConnection::sendQuery(
 
     state->query_id = query_id;
     state->query = query;
+    state->query_scope_holder = std::make_unique<CurrentThread::QueryScope>(query_context);
     state->stage = QueryProcessingStage::Enum(stage);
     state->profile_queue = std::make_shared<InternalProfileEventsQueue>(std::numeric_limits<int>::max());
     CurrentThread::attachInternalProfileEventsQueue(state->profile_queue);
diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h
index 5cc3d0b30ec..b424c5b5aa3 100644
--- a/src/Client/LocalConnection.h
+++ b/src/Client/LocalConnection.h
@@ -61,6 +61,8 @@ struct LocalQueryState
     /// Time after the last check to stop the request and send the progress.
     Stopwatch after_send_progress;
     Stopwatch after_send_profile_events;
+
+    std::unique_ptr<CurrentThread::QueryScope> query_scope_holder;
 };
 
 
diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp
index c1f163939e8..0188ebc8173 100644
--- a/src/Client/Suggest.cpp
+++ b/src/Client/Suggest.cpp
@@ -96,10 +96,6 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p
     loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this]
     {
         ThreadStatus thread_status;
-        my_context->makeQueryContext();
-        auto group = ThreadGroup::createForQuery(my_context);
-        CurrentThread::attachToGroup(group);
-
         for (size_t retry = 0; retry < 10; ++retry)
         {
             try

From ca9bd647fbd97fd36de5e30778c824ae522e03c3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 8 Aug 2024 04:52:18 +0200
Subject: [PATCH 110/363] Simplification

---
 src/Client/ClientBase.cpp | 43 +++++++--------------------------------
 1 file changed, 7 insertions(+), 36 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 62a008bc88c..a305278fb4d 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1877,48 +1877,19 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
     profile_events.watch.restart();
 
     {
-        /// Temporarily apply query settings to context.
-        std::optional<Settings> old_settings;
-        SCOPE_EXIT_SAFE({
-            if (old_settings)
-                client_context->setSettings(*old_settings);
+        /// Temporarily apply query settings to the context.
+        Settings old_settings = client_context->getSettingsCopy();
+        SCOPE_EXIT_SAFE(
+        {
+            client_context->setSettings(old_settings);
         });
-
-        auto apply_query_settings = [&](const IAST & settings_ast)
-        {
-            if (!old_settings)
-                old_settings.emplace(client_context->getSettingsRef());
-            client_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
-            client_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
-        };
-
-        const auto * insert = parsed_query->as<ASTInsertQuery>();
-        if (const auto * select = parsed_query->as<ASTSelectQuery>(); select && select->settings())
-            apply_query_settings(*select->settings());
-        else if (const auto * select_with_union = parsed_query->as<ASTSelectWithUnionQuery>())
-        {
-            const ASTs & children = select_with_union->list_of_selects->children;
-            if (!children.empty())
-            {
-                // On the client it is enough to apply settings only for the
-                // last SELECT, since the only thing that is important to apply
-                // on the client is format settings.
-                const auto * last_select = children.back()->as<ASTSelectQuery>();
-                if (last_select && last_select->settings())
-                {
-                    apply_query_settings(*last_select->settings());
-                }
-            }
-        }
-        else if (const auto * query_with_output = parsed_query->as<ASTQueryWithOutput>(); query_with_output && query_with_output->settings_ast)
-            apply_query_settings(*query_with_output->settings_ast);
-        else if (insert && insert->settings_ast)
-            apply_query_settings(*insert->settings_ast);
+        InterpreterSetQuery::applySettingsFromQuery(parsed_query, client_context);
 
         if (!connection->checkConnected(connection_parameters.timeouts))
             connect();
 
         ASTPtr input_function;
+        const auto * insert = parsed_query->as<ASTInsertQuery>();
         if (insert && insert->select)
             insert->tryFindInputFunction(input_function);
 

From d52d599af4db1c779551b6788a5505e521fe3c31 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 8 Aug 2024 18:34:02 +0200
Subject: [PATCH 111/363] Annotations

---
 ...ter_skip_virtual_columns_with_non_deterministic_functions.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql
index 6ef8c5a8656..6714a069246 100644
--- a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql
+++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql
@@ -1,3 +1,4 @@
+-- Tags: long
 SET max_rows_to_read = 0;
 create table test (number UInt64) engine=MergeTree order by number;
 insert into test select * from numbers(50000000);

From f2731841de804c30ece1c75e84c8ca8d3eb62ef8 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 8 Aug 2024 20:20:21 +0200
Subject: [PATCH 112/363] init

---
 src/Core/callOnTypeIndex.h                    |   3 +
 src/DataTypes/getLeastSupertype.cpp           |  41 +++++
 src/DataTypes/getLeastSupertype.h             |  22 +++
 src/Functions/FunctionsConversion.cpp         |  61 +++++++-
 ...23_interval_data_type_comparison.reference |  99 ++++++++++++
 .../03223_interval_data_type_comparison.sql   | 142 ++++++++++++++++++
 6 files changed, 365 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/03223_interval_data_type_comparison.reference
 create mode 100644 tests/queries/0_stateless/03223_interval_data_type_comparison.sql

diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h
index f5f67df563b..ae5afce36be 100644
--- a/src/Core/callOnTypeIndex.h
+++ b/src/Core/callOnTypeIndex.h
@@ -3,6 +3,7 @@
 #include <utility>
 
 #include <Core/Types.h>
+#include <DataTypes/DataTypeInterval.h>
 
 
 namespace DB
@@ -212,6 +213,8 @@ static bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... ar
         case TypeIndex::IPv4:           return f(TypePair<DataTypeIPv4, T>(), std::forward<ExtraArgs>(args)...);
         case TypeIndex::IPv6:           return f(TypePair<DataTypeIPv6, T>(), std::forward<ExtraArgs>(args)...);
 
+        case TypeIndex::Interval:       return f(TypePair<DataTypeInterval, T>(), std::forward<ExtraArgs>(args)...);
+
         default:
             break;
     }
diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp
index a71b19d6c92..0b9c744c091 100644
--- a/src/DataTypes/getLeastSupertype.cpp
+++ b/src/DataTypes/getLeastSupertype.cpp
@@ -228,6 +228,40 @@ void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & type
     }
 }
 
+DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &types_set)
+{
+    const auto& granularity_map = getGranularityMap();
+    int min_granularity = std::get<0>(granularity_map.at(IntervalKind::Kind::Year));
+    DataTypePtr smallest_type;
+
+    bool is_higher_interval = false; // For Years, Quarters and Months
+
+    for (const auto &type : types)
+    {
+        if (const auto * interval_type = typeid_cast<const DataTypeInterval *>(type.get()))
+        {
+            int current_granularity = std::get<0>(granularity_map.at(interval_type->getKind()));
+            if (current_granularity > 8)
+                is_higher_interval = true;
+            if (current_granularity < min_granularity)
+            {
+                min_granularity = current_granularity;
+                smallest_type = type;
+            }
+        }
+    }
+
+    if (is_higher_interval && min_granularity <= 8)
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName());
+
+    if (smallest_type)
+    {
+        types_set.clear();
+        types_set.insert(smallest_type->getTypeId());
+    }
+
+    return smallest_type;
+}
 }
 
 template <LeastSupertypeOnError on_error>
@@ -652,6 +686,13 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
             return numeric_type;
     }
 
+    /// For interval data types.
+    {
+        auto res = findSmallestIntervalSuperType(types, type_ids);
+        if (res)
+            return res;
+    }
+
     /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
     return throwOrReturn<on_error>(types, "", ErrorCodes::NO_COMMON_TYPE);
 }
diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h
index 2ae1e52ca96..c584eb83011 100644
--- a/src/DataTypes/getLeastSupertype.h
+++ b/src/DataTypes/getLeastSupertype.h
@@ -1,5 +1,7 @@
 #pragma once
 #include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeInterval.h>
+#include <Common/IntervalKind.h>
 
 namespace DB
 {
@@ -48,4 +50,24 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types);
 
 DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types);
 
+/// A map that enumerated all interval kinds in ascending order with a conversion value to a next interval
+inline const std::unordered_map<IntervalKind::Kind, std::pair<int, int>> & getGranularityMap()
+{
+    static std::unordered_map<IntervalKind::Kind, std::pair<int, int>> granularity_map =
+    {
+        {IntervalKind::Kind::Nanosecond, {1, 1000}},
+        {IntervalKind::Kind::Microsecond, {2, 1000}},
+        {IntervalKind::Kind::Millisecond, {3, 1000}},
+        {IntervalKind::Kind::Second, {4, 60}},
+        {IntervalKind::Kind::Minute, {5, 60}},
+        {IntervalKind::Kind::Hour, {6, 24}},
+        {IntervalKind::Kind::Day, {7, 7}},
+        {IntervalKind::Kind::Week, {8, 4}},
+        {IntervalKind::Kind::Month, {9, 3}},
+        {IntervalKind::Kind::Quarter, {10, 4}},
+        {IntervalKind::Kind::Year, {11, 1}}
+    };
+    return granularity_map;
+}
+
 }
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 675283d011e..0ab1858dc97 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -45,6 +45,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/Serializations/SerializationDecimal.h>
+#include <DataTypes/getLeastSupertype.h>
 #include <Formats/FormatSettings.h>
 #include <Formats/FormatFactory.h>
 #include <Functions/CastOverloadResolver.h>
@@ -1573,6 +1574,55 @@ struct ConvertImpl
                         arguments, result_type, input_rows_count, additions);
             }
         }
+        else if constexpr (std::is_same_v<FromDataType, DataTypeInterval> && std::is_same_v<ToDataType, DataTypeInterval>)
+        {
+            IntervalKind to = typeid_cast<const DataTypeInterval *>(result_type.get())->getKind();
+            IntervalKind from = typeid_cast<const DataTypeInterval *>(arguments[0].type.get())->getKind();
+
+            if (from == to)
+                return arguments[0].column;
+
+            const auto &map = getGranularityMap();
+            Int64 conversion_factor = 1;
+            Int64 result_value;
+
+            int from_position = map.at(from).first;
+            int to_position = map.at(to).first; // Positions of each interval according to granurality map
+
+            if (from_position < to_position)
+            {
+                for (int i = from_position - 1; i <= to_position; ++i)
+                {
+                    // Find the kind that matches this position
+                    for (const auto &entry : map)
+                    {
+                        if (entry.second.first == i)
+                        {
+                            conversion_factor *= entry.second.second;
+                            break;
+                        }
+                    }
+                }
+                result_value = arguments[0].column->getInt(0) / conversion_factor;
+            }
+            else
+            { 
+                for (int i = from_position - 1; i >= to_position; --i)
+                {
+                    for (const auto &entry : map)
+                    {
+                        if (entry.second.first == i)
+                        {
+                            conversion_factor *= entry.second.second;
+                            break;
+                        }
+                    }
+                }
+                result_value = arguments[0].column->getInt(0) * conversion_factor;
+            }
+
+            return ColumnConst::create(ColumnInt64::create(1, result_value), input_rows_count);
+        }
         else
         {
             using FromFieldType = typename FromDataType::FieldType;
@@ -2181,7 +2231,7 @@ private:
         const DataTypePtr from_type = removeNullable(arguments[0].type);
         ColumnPtr result_column;
 
-        [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior;
+        FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior;
 
         if (context)
             date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value;
@@ -2277,7 +2327,7 @@ private:
                 }
             }
             else
-                  result_column = ConvertImpl<LeftDataType, RightDataType, Name>::execute(arguments, result_type, input_rows_count, from_string_tag);
+                result_column = ConvertImpl<LeftDataType, RightDataType, Name>::execute(arguments, result_type, input_rows_count, from_string_tag);
 
             return true;
         };
@@ -2334,6 +2384,11 @@ private:
                 else
                     done = callOnIndexAndDataType<ToDataType>(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag);
             }
+
+            if constexpr (std::is_same_v<ToDataType, DataTypeInterval>)
+            {
+                done = callOnIndexAndDataType<ToDataType>(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag);
+            }
         }
 
         if (!done)
@@ -5224,7 +5279,7 @@ REGISTER_FUNCTION(Conversion)
     /// MySQL compatibility alias. Cannot be registered as alias,
     /// because we don't want it to be normalized to toDate in queries,
     /// otherwise CREATE DICTIONARY query breaks.
-    factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::Case::Insensitive);
+    factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive);
 
     factory.registerFunction<FunctionToDate32>();
     factory.registerFunction<FunctionToDateTime>();
diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.reference b/tests/queries/0_stateless/03223_interval_data_type_comparison.reference
new file mode 100644
index 00000000000..e98f792e4b2
--- /dev/null
+++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.reference
@@ -0,0 +1,99 @@
+Comparing nanoseconds
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+Comparing microseconds
+1
+1
+1
+1
+1
+1
+1
+0
+0
+0
+0
+0
+0
+0
+Comparing milliseconds
+1
+1
+1
+1
+1
+1
+0
+0
+0
+0
+0
+0
+Comparing seconds
+1
+1
+1
+1
+1
+0
+0
+0
+0
+0
+Comparing minutes
+1
+1
+1
+1
+0
+0
+0
+0
+Comparing hours
+1
+1
+1
+0
+0
+0
+Comparing days
+1
+1
+0
+0
+Comparing weeks
+1
+0
+Comparing months
+1
+1
+1
+0
+0
+0
+Comparing quarters
+1
+1
+0
+0
+Comparing years
+1
+0
diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql
new file mode 100644
index 00000000000..6e4862bf2d2
--- /dev/null
+++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql
@@ -0,0 +1,142 @@
+SELECT('Comparing nanoseconds');
+SELECT toIntervalNanosecond(500) > toIntervalNanosecond(300);
+SELECT toIntervalNanosecond(1000) < toIntervalNanosecond(1500);
+SELECT toIntervalNanosecond(2000) = toIntervalNanosecond(2000);
+SELECT toIntervalNanosecond(1000) >= toIntervalMicrosecond(1);
+SELECT toIntervalNanosecond(1000001) > toIntervalMillisecond(1);
+SELECT toIntervalNanosecond(2000000001) > toIntervalSecond(2);
+SELECT toIntervalNanosecond(60000000000) = toIntervalMinute(1);
+SELECT toIntervalNanosecond(7199999999999) < toIntervalHour(2);
+SELECT toIntervalNanosecond(1) < toIntervalDay(2);
+SELECT toIntervalNanosecond(5) < toIntervalWeek(1);
+
+SELECT toIntervalNanosecond(500) < toIntervalNanosecond(300);
+SELECT toIntervalNanosecond(1000) > toIntervalNanosecond(1500);
+SELECT toIntervalNanosecond(2000) != toIntervalNanosecond(2000);
+SELECT toIntervalNanosecond(1000) < toIntervalMicrosecond(1);
+SELECT toIntervalNanosecond(1000001) < toIntervalMillisecond(1);
+SELECT toIntervalNanosecond(2000000001) < toIntervalSecond(2);
+SELECT toIntervalNanosecond(60000000000) != toIntervalMinute(1);
+SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2);
+SELECT toIntervalNanosecond(1) > toIntervalDay(2);
+SELECT toIntervalNanosecond(5) > toIntervalWeek(1);
+
+SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing microseconds');
+SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999);
+SELECT toIntervalMicrosecond(1001) > toIntervalMillisecond(1);
+SELECT toIntervalMicrosecond(2000000) = toIntervalSecond(2);
+SELECT toIntervalMicrosecond(179999999) < toIntervalMinute(3);
+SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1);
+SELECT toIntervalMicrosecond(36000000000000) > toIntervalDay(2);
+SELECT toIntervalMicrosecond(1209600000000) = toIntervalWeek(2);
+
+SELECT toIntervalMicrosecond(1) > toIntervalMicrosecond(999);
+SELECT toIntervalMicrosecond(1001) < toIntervalMillisecond(1);
+SELECT toIntervalMicrosecond(2000000) != toIntervalSecond(2);
+SELECT toIntervalMicrosecond(179999999) > toIntervalMinute(3);
+SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1);
+SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2);
+SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2);
+
+SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing milliseconds');
+SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2);
+SELECT toIntervalMillisecond(2000) = toIntervalSecond(2);
+SELECT toIntervalMillisecond(170000) < toIntervalMinute(3);
+SELECT toIntervalMillisecond(144000001) > toIntervalHour(40);
+SELECT toIntervalMillisecond(1728000000) = toIntervalDay(20);
+SELECT toIntervalMillisecond(1198599999) < toIntervalWeek(2);
+
+SELECT toIntervalMillisecond(2000) < toIntervalMillisecond(2);
+SELECT toIntervalMillisecond(2000) != toIntervalSecond(2);
+SELECT toIntervalMillisecond(170000) > toIntervalMinute(3);
+SELECT toIntervalMillisecond(144000001) < toIntervalHour(40);
+SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20);
+SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2);
+
+SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing seconds');
+SELECT toIntervalSecond(120) > toIntervalSecond(2);
+SELECT toIntervalSecond(120) = toIntervalMinute(2);
+SELECT toIntervalSecond(1) < toIntervalHour(2);
+SELECT toIntervalSecond(86401) >= toIntervalDay(1);
+SELECT toIntervalSecond(1209600) = toIntervalWeek(2);
+
+SELECT toIntervalSecond(120) < toIntervalSecond(2);
+SELECT toIntervalSecond(120) != toIntervalMinute(2);
+SELECT toIntervalSecond(1) > toIntervalHour(2);
+SELECT toIntervalSecond(86401) < toIntervalDay(1);
+SELECT toIntervalSecond(1209600) != toIntervalWeek(2);
+
+SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing minutes');
+SELECT toIntervalMinute(1) < toIntervalMinute(59);
+SELECT toIntervalMinute(1) < toIntervalHour(59);
+SELECT toIntervalMinute(1440) = toIntervalDay(1);
+SELECT toIntervalMinute(30241) > toIntervalWeek(3);
+
+SELECT toIntervalMinute(1) > toIntervalMinute(59);
+SELECT toIntervalMinute(1) > toIntervalHour(59);
+SELECT toIntervalMinute(1440) != toIntervalDay(1);
+SELECT toIntervalMinute(30241) < toIntervalWeek(3);
+
+SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing hours');
+SELECT toIntervalHour(48) > toIntervalHour(2);
+SELECT toIntervalHour(48) >= toIntervalDay(2);
+SELECT toIntervalHour(672) = toIntervalWeek(4);
+
+SELECT toIntervalHour(48) < toIntervalHour(2);
+SELECT toIntervalHour(48) < toIntervalDay(2);
+SELECT toIntervalHour(672) != toIntervalWeek(4);
+
+SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing days');
+SELECT toIntervalDay(1) < toIntervalDay(23);
+SELECT toIntervalDay(25) > toIntervalWeek(3);
+
+SELECT toIntervalDay(1) > toIntervalDay(23);
+SELECT toIntervalDay(25) < toIntervalWeek(3);
+
+SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing weeks');
+SELECT toIntervalWeek(1) < toIntervalWeek(6);
+
+SELECT toIntervalWeek(1) > toIntervalWeek(6);
+
+SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing months');
+SELECT toIntervalMonth(1) < toIntervalMonth(3);
+SELECT toIntervalMonth(124) > toIntervalQuarter(5);
+SELECT toIntervalMonth(36) = toIntervalYear(3);
+
+SELECT toIntervalMonth(1) > toIntervalMonth(3);
+SELECT toIntervalMonth(124) < toIntervalQuarter(5);
+SELECT toIntervalMonth(36) != toIntervalYear(3);
+
+SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing quarters');
+SELECT toIntervalQuarter(5) > toIntervalQuarter(4);
+SELECT toIntervalQuarter(20) = toIntervalYear(5);
+
+SELECT toIntervalQuarter(5) < toIntervalQuarter(4);
+SELECT toIntervalQuarter(20) != toIntervalYear(5);
+
+SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT('Comparing years');
+SELECT toIntervalYear(1) < toIntervalYear(3);
+
+SELECT toIntervalYear(1) > toIntervalYear(3);
+
+SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

From e9659626adc29d237d23e0f3ced9c8712d472a73 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 8 Aug 2024 20:41:15 +0200
Subject: [PATCH 113/363] fix style + add docs

---
 .../data-types/special-data-types/interval.md | 21 +++++++++----------
 .../data-types/special-data-types/interval.md | 21 +++++++++----------
 .../data-types/special-data-types/interval.md | 21 +++++++++----------
 src/DataTypes/getLeastSupertype.cpp           |  2 +-
 4 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md
index bedbcf0bd28..be26053580b 100644
--- a/docs/en/sql-reference/data-types/special-data-types/interval.md
+++ b/docs/en/sql-reference/data-types/special-data-types/interval.md
@@ -53,29 +53,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY
 └─────────────────────┴───────────────────────────────┘
 ```
 
-Intervals with different types can’t be combined. You can’t use intervals like `4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, the interval `1 day and an hour` interval can be expressed as `25 HOUR` or `90000 SECOND`.
-
-You can’t perform arithmetical operations with `Interval`-type values, but you can add intervals of different types consequently to values in `Date` or `DateTime` data types. For example:
+Also it is possible to use multiple intervals simultaneously:
 
 ``` sql
-SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
+SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
 ```
 
 ``` text
-┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
-│ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
-└─────────────────────┴────────────────────────────────────────────────────────┘
+┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐
+│ 2024-08-08 18:31:39 │                                                2024-08-12 21:31:39 │
+└─────────────────────┴────────────────────────────────────────────────────────────────────┘
 ```
 
-The following query causes an exception:
+And to compare values with different intevals:
 
 ``` sql
-select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
+SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1);
 ```
 
 ``` text
-Received exception from server (version 19.14.1):
-Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime..
+┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐
+│                                                           1 │
+└─────────────────────────────────────────────────────────────┘
 ```
 
 ## See Also
diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md
index 867a6665f4b..5064391f582 100644
--- a/docs/ru/sql-reference/data-types/special-data-types/interval.md
+++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md
@@ -54,29 +54,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY
 └─────────────────────┴───────────────────────────────┘
 ```
 
-Нельзя объединять интервалы различных типов. Нельзя использовать интервалы вида `4 DAY 1 HOUR`. Вместо этого выражайте интервал в единицах меньших или равных минимальной единице интервала, например, интервал «1 день и 1 час» можно выразить как `25 HOUR` или `90000 SECOND`.
-
-Арифметические операции со значениями типов `Interval` не доступны, однако можно последовательно добавлять различные интервалы к значениям типов `Date` и `DateTime`. Например:
+Также можно использовать различные типы интервалов одновременно:
 
 ``` sql
-SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
+SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
 ```
 
 ``` text
-┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
-│ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
-└─────────────────────┴────────────────────────────────────────────────────────┘
+┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐
+│ 2024-08-08 18:31:39 │                                                2024-08-12 21:31:39 │
+└─────────────────────┴────────────────────────────────────────────────────────────────────┘
 ```
 
-Следующий запрос приведёт к генерированию исключения:
+И сравнивать значения из разными интервалами:
 
 ``` sql
-select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
+SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1);
 ```
 
 ``` text
-Received exception from server (version 19.14.1):
-Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime..
+┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐
+│                                                           1 │
+└─────────────────────────────────────────────────────────────┘
 ```
 
 ## Смотрите также {#smotrite-takzhe}
diff --git a/docs/zh/sql-reference/data-types/special-data-types/interval.md b/docs/zh/sql-reference/data-types/special-data-types/interval.md
index e05869b2df8..e16f6d5f84f 100644
--- a/docs/zh/sql-reference/data-types/special-data-types/interval.md
+++ b/docs/zh/sql-reference/data-types/special-data-types/interval.md
@@ -55,29 +55,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY
 └─────────────────────┴───────────────────────────────┘
 ```
 
-不同类型的间隔不能合并。 你不能使用诸如 `4 DAY 1 HOUR` 的时间间隔. 以小于或等于时间间隔最小单位的单位来指定间隔，例如，时间间隔 `1 day and an hour` 可以表示为 `25 HOUR` 或 `90000 SECOND`.
-
-你不能对 `Interval` 类型的值执行算术运算，但你可以向 `Date` 或 `DateTime` 数据类型的值添加不同类型的时间间隔，例如:
+也可以同時使用多個間隔：
 
 ``` sql
-SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
+SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
 ```
 
 ``` text
-┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
-│ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
-└─────────────────────┴────────────────────────────────────────────────────────┘
+┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐
+│ 2024-08-08 18:31:39 │                                                2024-08-12 21:31:39 │
+└─────────────────────┴────────────────────────────────────────────────────────────────────┘
 ```
 
-以下查询将导致异常:
+並比較不同直數的值：
 
 ``` sql
-select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
+SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1);
 ```
 
 ``` text
-Received exception from server (version 19.14.1):
-Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime..
+┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐
+│                                                           1 │
+└─────────────────────────────────────────────────────────────┘
 ```
 
 ## 另请参阅 {#see-also}
diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp
index 0b9c744c091..674284460dc 100644
--- a/src/DataTypes/getLeastSupertype.cpp
+++ b/src/DataTypes/getLeastSupertype.cpp
@@ -252,7 +252,7 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &
     }
 
     if (is_higher_interval && min_granularity <= 8)
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName());
+        throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName());
 
     if (smallest_type)
     {

From 91ff9f40a2cea46d2ddf14628b16a0ddf923a3cc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 8 Aug 2024 21:04:10 +0200
Subject: [PATCH 114/363] Misc

---
 tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
index 278817b1d48..8a6fa9b7845 100644
--- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
+++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql
@@ -4,6 +4,7 @@
 set enable_filesystem_cache=0;
 set enable_filesystem_cache_on_write_operations=0;
 set max_rows_to_read = '30M';
+
 drop table if exists t;
 
 create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';

From 0ad6aa09acb72a67fc88e0cd8186afd32fefd6bf Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 8 Aug 2024 22:51:27 +0200
Subject: [PATCH 115/363] fix style

---
 docs/en/sql-reference/data-types/special-data-types/interval.md | 2 +-
 src/Functions/FunctionsConversion.cpp                           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md
index be26053580b..4ef1a7e6238 100644
--- a/docs/en/sql-reference/data-types/special-data-types/interval.md
+++ b/docs/en/sql-reference/data-types/special-data-types/interval.md
@@ -65,7 +65,7 @@ SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVA
 └─────────────────────┴────────────────────────────────────────────────────────────────────┘
 ```
 
-And to compare values with different intevals:
+And to compare values with different intervals:
 
 ``` sql
 SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1);
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 0ab1858dc97..1708991af74 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -1606,7 +1606,7 @@ struct ConvertImpl
                 result_value = arguments[0].column->getInt(0) / conversion_factor;
             }
             else
-            { 
+            {
                 for (int i = from_position - 1; i >= to_position; --i)
                 {
                     for (const auto &entry : map)

From 94efbb0bf9ab62a5399d4918e7bcfd358421a879 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 8 Aug 2024 23:26:24 +0200
Subject: [PATCH 116/363] fix build

---
 src/Functions/FunctionsConversion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 1708991af74..43ebe573582 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -5279,7 +5279,7 @@ REGISTER_FUNCTION(Conversion)
     /// MySQL compatibility alias. Cannot be registered as alias,
     /// because we don't want it to be normalized to toDate in queries,
     /// otherwise CREATE DICTIONARY query breaks.
-    factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive);
+    factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::Case::Insensitive);
 
     factory.registerFunction<FunctionToDate32>();
     factory.registerFunction<FunctionToDateTime>();

From b4c553718353eb2302f85ea4d096a92036ce832c Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 8 Aug 2024 23:49:56 +0200
Subject: [PATCH 117/363] fix errorcodes in test

---
 .../03223_interval_data_type_comparison.sql   | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql
index 6e4862bf2d2..5d01addae45 100644
--- a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql
+++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql
@@ -21,7 +21,7 @@ SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2);
 SELECT toIntervalNanosecond(1) > toIntervalDay(2);
 SELECT toIntervalNanosecond(5) > toIntervalWeek(1);
 
-SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing microseconds');
 SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999);
@@ -40,7 +40,7 @@ SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1);
 SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2);
 SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2);
 
-SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing milliseconds');
 SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2);
@@ -57,7 +57,7 @@ SELECT toIntervalMillisecond(144000001) < toIntervalHour(40);
 SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20);
 SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2);
 
-SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing seconds');
 SELECT toIntervalSecond(120) > toIntervalSecond(2);
@@ -72,7 +72,7 @@ SELECT toIntervalSecond(1) > toIntervalHour(2);
 SELECT toIntervalSecond(86401) < toIntervalDay(1);
 SELECT toIntervalSecond(1209600) != toIntervalWeek(2);
 
-SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing minutes');
 SELECT toIntervalMinute(1) < toIntervalMinute(59);
@@ -85,7 +85,7 @@ SELECT toIntervalMinute(1) > toIntervalHour(59);
 SELECT toIntervalMinute(1440) != toIntervalDay(1);
 SELECT toIntervalMinute(30241) < toIntervalWeek(3);
 
-SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing hours');
 SELECT toIntervalHour(48) > toIntervalHour(2);
@@ -96,7 +96,7 @@ SELECT toIntervalHour(48) < toIntervalHour(2);
 SELECT toIntervalHour(48) < toIntervalDay(2);
 SELECT toIntervalHour(672) != toIntervalWeek(4);
 
-SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing days');
 SELECT toIntervalDay(1) < toIntervalDay(23);
@@ -105,14 +105,14 @@ SELECT toIntervalDay(25) > toIntervalWeek(3);
 SELECT toIntervalDay(1) > toIntervalDay(23);
 SELECT toIntervalDay(25) < toIntervalWeek(3);
 
-SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing weeks');
 SELECT toIntervalWeek(1) < toIntervalWeek(6);
 
 SELECT toIntervalWeek(1) > toIntervalWeek(6);
 
-SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing months');
 SELECT toIntervalMonth(1) < toIntervalMonth(3);
@@ -123,7 +123,7 @@ SELECT toIntervalMonth(1) > toIntervalMonth(3);
 SELECT toIntervalMonth(124) < toIntervalQuarter(5);
 SELECT toIntervalMonth(36) != toIntervalYear(3);
 
-SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing quarters');
 SELECT toIntervalQuarter(5) > toIntervalQuarter(4);
@@ -132,11 +132,11 @@ SELECT toIntervalQuarter(20) = toIntervalYear(5);
 SELECT toIntervalQuarter(5) < toIntervalQuarter(4);
 SELECT toIntervalQuarter(20) != toIntervalYear(5);
 
-SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing years');
 SELECT toIntervalYear(1) < toIntervalYear(3);
 
 SELECT toIntervalYear(1) > toIntervalYear(3);
 
-SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError NO_COMMON_TYPE }

From 3357275fa8c55bcc5371b4ff9c9a5d80e51ab689 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 9 Aug 2024 18:33:45 +0800
Subject: [PATCH 118/363] Fix MSAN issue caused by incorrect date format.

---
 src/IO/ReadHelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index c771fced73a..dd4aef23a25 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1432,7 +1432,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
                 s_pos[size] = 0;
 
                 if constexpr (throw_exception)
-                    throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", s);
+                    throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, date_broken_down_length + 1 + size));
                 else
                     return false;
             }

From 35f19522e745ef2267b4c6f99dfc5d7c1f7e78c3 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 9 Aug 2024 12:56:14 +0200
Subject: [PATCH 119/363] fix fuzzer

---
 src/Functions/FunctionsConversion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 43ebe573582..c25bc44450f 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -1579,7 +1579,7 @@ struct ConvertImpl
             IntervalKind to = typeid_cast<const DataTypeInterval *>(result_type.get())->getKind();
             IntervalKind from = typeid_cast<const DataTypeInterval *>(arguments[0].type.get())->getKind();
 
-            if (from == to)
+            if (from == to || arguments[0].column->empty())
                 return arguments[0].column;
 
             const auto &map = getGranularityMap();

From ca4041847e4aa8acccd6ea31c0a18f2160c0dc7a Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 9 Aug 2024 19:15:41 +0800
Subject: [PATCH 120/363] Add tests

---
 src/IO/ReadHelpers.cpp                            |  4 ++--
 ...215_fix_datetime_implicit_conversion.reference |  1 +
 .../03215_fix_datetime_implicit_conversion.sql    | 15 +++++++++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference
 create mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index dd4aef23a25..e69b4187b37 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1402,7 +1402,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
             s_pos[size] = 0;
 
             if constexpr (throw_exception)
-                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", s);
+                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", String(s, already_read_length));
             else
                 return false;
         }
@@ -1432,7 +1432,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
                 s_pos[size] = 0;
 
                 if constexpr (throw_exception)
-                    throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, date_broken_down_length + 1 + size));
+                    throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, size));
                 else
                     return false;
             }
diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql
new file mode 100644
index 00000000000..70a8a3432a6
--- /dev/null
+++ b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS tab SYNC;
+
+CREATE TABLE tab
+(
+    a DateTime,
+    pk String
+) Engine = MergeTree() ORDER BY pk;
+
+INSERT INTO tab select cast(number, 'DateTime'), generateUUIDv4() FROM system.numbers LIMIT 1;
+
+SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09';
+SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0';  -- { serverError CANNOT_PARSE_DATETIME }
+SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09';  -- { serverError TYPE_MISMATCH }
+
+DROP TABLE IF EXISTS tab SYNC;

From 6ded5e1c8b994ad2332468e605b17a74e8d5675f Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 9 Aug 2024 23:50:03 +0800
Subject: [PATCH 121/363] Some fixups

---
 src/IO/ReadHelpers.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index e69b4187b37..b484f80250d 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1399,10 +1399,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
         size_t size = buf.read(s_pos, remaining_date_size);
         if (size != remaining_date_size)
         {
-            s_pos[size] = 0;
-
             if constexpr (throw_exception)
-                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", String(s, already_read_length));
+                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", std::string_view(s, already_read_length + size));
             else
                 return false;
         }
@@ -1429,10 +1427,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
 
             if (size != time_broken_down_length)
             {
-                s_pos[size] = 0;
-
                 if constexpr (throw_exception)
-                    throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, size));
+                    throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", std::string_view(s, size));
                 else
                     return false;
             }

From a3d8db6e1eb27d6a8fa81bbf43c8ffb171714c0b Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 9 Aug 2024 19:05:37 +0200
Subject: [PATCH 122/363] updates due to review

---
 .../data-types/special-data-types/interval.md | 21 +++++++-------
 src/DataTypes/getLeastSupertype.cpp           | 13 ++++-----
 src/DataTypes/getLeastSupertype.h             | 21 ++------------
 src/Functions/FunctionsConversion.cpp         | 28 +++----------------
 4 files changed, 24 insertions(+), 59 deletions(-)

diff --git a/docs/zh/sql-reference/data-types/special-data-types/interval.md b/docs/zh/sql-reference/data-types/special-data-types/interval.md
index e16f6d5f84f..e05869b2df8 100644
--- a/docs/zh/sql-reference/data-types/special-data-types/interval.md
+++ b/docs/zh/sql-reference/data-types/special-data-types/interval.md
@@ -55,28 +55,29 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY
 └─────────────────────┴───────────────────────────────┘
 ```
 
-也可以同時使用多個間隔：
+不同类型的间隔不能合并。 你不能使用诸如 `4 DAY 1 HOUR` 的时间间隔. 以小于或等于时间间隔最小单位的单位来指定间隔，例如，时间间隔 `1 day and an hour` 可以表示为 `25 HOUR` 或 `90000 SECOND`.
+
+你不能对 `Interval` 类型的值执行算术运算，但你可以向 `Date` 或 `DateTime` 数据类型的值添加不同类型的时间间隔，例如:
 
 ``` sql
-SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
+SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
 ```
 
 ``` text
-┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐
-│ 2024-08-08 18:31:39 │                                                2024-08-12 21:31:39 │
-└─────────────────────┴────────────────────────────────────────────────────────────────────┘
+┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
+│ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
+└─────────────────────┴────────────────────────────────────────────────────────┘
 ```
 
-並比較不同直數的值：
+以下查询将导致异常:
 
 ``` sql
-SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1);
+select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
 ```
 
 ``` text
-┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐
-│                                                           1 │
-└─────────────────────────────────────────────────────────────┘
+Received exception from server (version 19.14.1):
+Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime..
 ```
 
 ## 另请参阅 {#see-also}
diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp
index 674284460dc..8bcec49815f 100644
--- a/src/DataTypes/getLeastSupertype.cpp
+++ b/src/DataTypes/getLeastSupertype.cpp
@@ -230,8 +230,7 @@ void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & type
 
 DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &types_set)
 {
-    const auto& granularity_map = getGranularityMap();
-    int min_granularity = std::get<0>(granularity_map.at(IntervalKind::Kind::Year));
+    auto min_interval = IntervalKind::Kind::Year;
     DataTypePtr smallest_type;
 
     bool is_higher_interval = false; // For Years, Quarters and Months
@@ -240,18 +239,18 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &
     {
         if (const auto * interval_type = typeid_cast<const DataTypeInterval *>(type.get()))
         {
-            int current_granularity = std::get<0>(granularity_map.at(interval_type->getKind()));
-            if (current_granularity > 8)
+            auto current_interval = interval_type->getKind().kind;
+            if (current_interval > IntervalKind::Kind::Week)
                 is_higher_interval = true;
-            if (current_granularity < min_granularity)
+            if (current_interval < min_interval)
             {
-                min_granularity = current_granularity;
+                min_interval = current_interval;
                 smallest_type = type;
             }
         }
     }
 
-    if (is_higher_interval && min_granularity <= 8)
+    if (is_higher_interval && min_interval <= IntervalKind::Kind::Week)
         throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName());
 
     if (smallest_type)
diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h
index c584eb83011..5ea2b6417b2 100644
--- a/src/DataTypes/getLeastSupertype.h
+++ b/src/DataTypes/getLeastSupertype.h
@@ -50,24 +50,9 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types);
 
 DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types);
 
-/// A map that enumerated all interval kinds in ascending order with a conversion value to a next interval
-inline const std::unordered_map<IntervalKind::Kind, std::pair<int, int>> & getGranularityMap()
-{
-    static std::unordered_map<IntervalKind::Kind, std::pair<int, int>> granularity_map =
-    {
-        {IntervalKind::Kind::Nanosecond, {1, 1000}},
-        {IntervalKind::Kind::Microsecond, {2, 1000}},
-        {IntervalKind::Kind::Millisecond, {3, 1000}},
-        {IntervalKind::Kind::Second, {4, 60}},
-        {IntervalKind::Kind::Minute, {5, 60}},
-        {IntervalKind::Kind::Hour, {6, 24}},
-        {IntervalKind::Kind::Day, {7, 7}},
-        {IntervalKind::Kind::Week, {8, 4}},
-        {IntervalKind::Kind::Month, {9, 3}},
-        {IntervalKind::Kind::Quarter, {10, 4}},
-        {IntervalKind::Kind::Year, {11, 1}}
-    };
-    return granularity_map;
+/// A vector that shows the conversion rates to the next Interval type starting from NanoSecond
+static std::vector<int> interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1};
+
 }
 
 }
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index c25bc44450f..25c6bbcbfef 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -1582,42 +1582,22 @@ struct ConvertImpl
             if (from == to || arguments[0].column->empty())
                 return arguments[0].column;
 
-            const auto &map = getGranularityMap();
             Int64 conversion_factor = 1;
             Int64 result_value;
 
-            int from_position = map.at(from).first;
-            int to_position = map.at(to).first; // Positions of each interval according to granurality map
+            int from_position = static_cast<int>(from.kind);
+            int to_position = static_cast<int>(to.kind); // Positions of each interval according to granurality map
 
             if (from_position < to_position)
             {
                 for (int i = from_position - 1; i <= to_position; ++i)
-                {
-                    // Find the kind that matches this position
-                    for (const auto &entry : map)
-                    {
-                        if (entry.second.first == i)
-                        {
-                            conversion_factor *= entry.second.second;
-                            break;
-                        }
-                    }
-                }
+                    conversion_factor *= interval_conversions[i];
                 result_value = arguments[0].column->getInt(0) / conversion_factor;
             }
             else
             {
                 for (int i = from_position - 1; i >= to_position; --i)
-                {
-                    for (const auto &entry : map)
-                    {
-                        if (entry.second.first == i)
-                        {
-                            conversion_factor *= entry.second.second;
-                            break;
-                        }
-                    }
-                }
+                    conversion_factor *= interval_conversions[i];
                 result_value = arguments[0].column->getInt(0) * conversion_factor;
             }
 

From a25accdae3cf420beaeb8ca25a9ac32070c397b8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 9 Aug 2024 20:20:09 +0200
Subject: [PATCH 123/363] Fix a test

---
 programs/client/Client.cpp | 3 +++
 src/Client/ClientBase.cpp  | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 1d99d223ee9..631914dee5c 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1164,6 +1164,9 @@ void Client::processOptions(const OptionsDescription & options_description,
     /// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.)
     client_context = global_context;
     initClientContext();
+
+    /// Allow to pass-through unknown settings to the server.
+    client_context->getAccessControl().allowAllSettings();
 }
 
 
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index a00a9499237..473db8e9678 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -689,9 +689,6 @@ void ClientBase::initClientContext()
     client_context->setQueryKindInitial();
     client_context->setQueryKind(query_kind);
     client_context->setQueryParameters(query_parameters);
-
-    /// Allow to pass-through unknown settings to the server.
-    client_context->getAccessControl().allowAllSettings();
 }
 
 bool ClientBase::isRegularFile(int fd)

From 384aedccaeece56456ad1e5ea17a8da4f56a69a4 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Sat, 10 Aug 2024 00:09:50 +0200
Subject: [PATCH 124/363] Update getLeastSupertype.h

---
 src/DataTypes/getLeastSupertype.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h
index 5ea2b6417b2..8dd1685e6e9 100644
--- a/src/DataTypes/getLeastSupertype.h
+++ b/src/DataTypes/getLeastSupertype.h
@@ -54,5 +54,3 @@ DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types);
 static std::vector<int> interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1};
 
 }
-
-}

From c716315c3fdfd57719daf0f7f42b786afe6e68af Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 10 Aug 2024 00:20:46 +0200
Subject: [PATCH 125/363] Annotations

---
 .../0_stateless/00375_shard_group_uniq_array_of_string.sql      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
index 8db91904a6a..c8a243d9b27 100644
--- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
+++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql
@@ -1,4 +1,4 @@
--- Tags: shard
+-- Tags: shard, long
 
 DROP TABLE IF EXISTS group_uniq_str;
 CREATE TABLE group_uniq_str ENGINE = Memory AS SELECT number % 10 as id, toString(intDiv((number%10000), 10)) as v FROM system.numbers LIMIT 10000000;

From 375de7ff6523a23fb7b898725a53004f24d047cd Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 11 Aug 2024 07:15:10 +0200
Subject: [PATCH 126/363] ci: add more logs in the functional tests reports

Due to settings randomization 4096 is not enough even to show all
settings, like here [1].

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/68139/c852bd9dbaa317423234d4f15f21d64e59be42b5/stateless_tests_flaky_check__asan_.html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 docker/test/util/process_functional_tests_result.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py
index aa2ea686c46..ec9e14b1430 100755
--- a/docker/test/util/process_functional_tests_result.py
+++ b/docker/test/util/process_functional_tests_result.py
@@ -116,7 +116,7 @@ def process_test_log(log_path, broken_tests):
             test[0],
             test[1],
             test[2],
-            "".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"),
+            "".join(test[3])[:8192].replace("\t", "\\t").replace("\n", "\\n"),
         ]
         for test in test_results
     ]

From ece707c4436ab65fcb142f0eaae72f7eb2c3d8db Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 10 Aug 2024 19:43:29 +0200
Subject: [PATCH 127/363] Better test for Not-ready Set is passed in system.*
 tables

- system.distribution_queue
- system.replication_queue
- system.rocksdb
- system.databases
- system.mutations
- test for system.part_moves_between_shards will not be provided since
  it is a likely deprecated feature and the test requires some code

(I've fixed it differently from #66018, but it does not make sense
anymore, so I'm submitting only the test)
---
 ...3223_system_tables_set_not_ready.reference |  5 ++++
 .../03223_system_tables_set_not_ready.sql     | 30 +++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 tests/queries/0_stateless/03223_system_tables_set_not_ready.reference
 create mode 100644 tests/queries/0_stateless/03223_system_tables_set_not_ready.sql

diff --git a/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference b/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference
new file mode 100644
index 00000000000..e39523ed4f5
--- /dev/null
+++ b/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference
@@ -0,0 +1,5 @@
+system.distribution_queue	1
+system.rocksdb	1
+system.databases	1
+system.mutations	1
+system.replication_queue	1
diff --git a/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql b/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql
new file mode 100644
index 00000000000..907fa47143c
--- /dev/null
+++ b/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql
@@ -0,0 +1,30 @@
+-- Tags: no-fasttest
+-- Tag no-fasttest -- due to EmbeddedRocksDB
+
+drop table if exists null;
+drop table if exists dist;
+create table null as system.one engine=Null;
+create table dist as null engine=Distributed(test_cluster_two_shards, currentDatabase(), 'null', rand());
+insert into dist settings prefer_localhost_replica=0 values (1);
+select 'system.distribution_queue', count() from system.distribution_queue where exists(select 1) and database = currentDatabase();
+
+drop table if exists rocksdb;
+create table rocksdb (key Int) engine=EmbeddedRocksDB() primary key key;
+insert into rocksdb values (1);
+select 'system.rocksdb', count()>0 from system.rocksdb where exists(select 1) and database = currentDatabase();
+
+select 'system.databases', count() from system.databases where exists(select 1) and database = currentDatabase();
+
+drop table if exists mt;
+create table mt (key Int) engine=MergeTree() order by key;
+alter table mt delete where 1;
+select 'system.mutations', count() from system.mutations where exists(select 1) and database = currentDatabase();
+
+drop table if exists rep1;
+drop table if exists rep2;
+create table rep1 (key Int) engine=ReplicatedMergeTree('/{database}/rep', '{table}') order by key;
+create table rep2 (key Int) engine=ReplicatedMergeTree('/{database}/rep', '{table}') order by key;
+system stop fetches rep2;
+insert into rep1 values (1);
+system sync replica rep2 pull;
+select 'system.replication_queue', count()>0 from system.replication_queue where exists(select 1) and database = currentDatabase();

From a41c1305887d08f43c02e354bb307f69a16b3fb0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 12 Aug 2024 06:05:44 +0200
Subject: [PATCH 128/363] Update
 02675_profile_events_from_query_log_and_client.sh

---
 .../02675_profile_events_from_query_log_and_client.sh           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh
index 894b2b61563..ff534a6a2e6 100755
--- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh
+++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest
+# Tags: no-fasttest, no-random-merge-tree-settings
 # Tag no-fasttest: needs s3
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

From 1cc845726842f388c4524d55b248f210e28d979d Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 12 Aug 2024 16:57:47 +0200
Subject: [PATCH 129/363] fix reviews, fix crash in fuzzer

---
 src/DataTypes/getLeastSupertype.cpp           |   2 +-
 src/DataTypes/getLeastSupertype.h             |   2 +-
 src/Functions/FunctionsConversion.cpp         |  11 +-
 .../03223_interval_data_type_comparison.sql   | 198 +++++++++---------
 4 files changed, 106 insertions(+), 107 deletions(-)

diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp
index 8bcec49815f..65df529e78b 100644
--- a/src/DataTypes/getLeastSupertype.cpp
+++ b/src/DataTypes/getLeastSupertype.cpp
@@ -251,7 +251,7 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &
     }
 
     if (is_higher_interval && min_interval <= IntervalKind::Kind::Week)
-        throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName());
+        throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the number of days in a month is not fixed", types[0]->getName(), types[1]->getName());
 
     if (smallest_type)
     {
diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h
index 8dd1685e6e9..55d8e8fff0d 100644
--- a/src/DataTypes/getLeastSupertype.h
+++ b/src/DataTypes/getLeastSupertype.h
@@ -51,6 +51,6 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types);
 DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types);
 
 /// A vector that shows the conversion rates to the next Interval type starting from NanoSecond
-static std::vector<int> interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1};
+static std::vector<int> interval_conversions = {1, 1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4};
 
 }
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 25c6bbcbfef..b6102cb7ecf 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -1586,17 +1586,17 @@ struct ConvertImpl
             Int64 result_value;
 
             int from_position = static_cast<int>(from.kind);
-            int to_position = static_cast<int>(to.kind); // Positions of each interval according to granurality map
+            int to_position = static_cast<int>(to.kind); /// Positions of each interval according to granularity map
 
             if (from_position < to_position)
             {
-                for (int i = from_position - 1; i <= to_position; ++i)
+                for (int i = from_position; i < to_position; ++i)
                     conversion_factor *= interval_conversions[i];
                 result_value = arguments[0].column->getInt(0) / conversion_factor;
             }
             else
             {
-                for (int i = from_position - 1; i >= to_position; --i)
+                for (int i = from_position; i > to_position; --i)
                     conversion_factor *= interval_conversions[i];
                 result_value = arguments[0].column->getInt(0) * conversion_factor;
             }
@@ -2366,9 +2366,8 @@ private:
             }
 
             if constexpr (std::is_same_v<ToDataType, DataTypeInterval>)
-            {
-                done = callOnIndexAndDataType<ToDataType>(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag);
-            }
+                if (WhichDataType(from_type).isInterval())
+                    done = callOnIndexAndDataType<ToDataType>(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag);
         }
 
         if (!done)
diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql
index 5d01addae45..77b6e2fa3dc 100644
--- a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql
+++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql
@@ -1,142 +1,142 @@
 SELECT('Comparing nanoseconds');
-SELECT toIntervalNanosecond(500) > toIntervalNanosecond(300);
-SELECT toIntervalNanosecond(1000) < toIntervalNanosecond(1500);
-SELECT toIntervalNanosecond(2000) = toIntervalNanosecond(2000);
-SELECT toIntervalNanosecond(1000) >= toIntervalMicrosecond(1);
-SELECT toIntervalNanosecond(1000001) > toIntervalMillisecond(1);
-SELECT toIntervalNanosecond(2000000001) > toIntervalSecond(2);
-SELECT toIntervalNanosecond(60000000000) = toIntervalMinute(1);
-SELECT toIntervalNanosecond(7199999999999) < toIntervalHour(2);
-SELECT toIntervalNanosecond(1) < toIntervalDay(2);
-SELECT toIntervalNanosecond(5) < toIntervalWeek(1);
+SELECT INTERVAL 500 NANOSECOND > INTERVAL 300 NANOSECOND;
+SELECT INTERVAL 1000 NANOSECOND < INTERVAL 1500 NANOSECOND;
+SELECT INTERVAL 2000 NANOSECOND = INTERVAL 2000 NANOSECOND;
+SELECT INTERVAL 1000 NANOSECOND >= INTERVAL 1 MICROSECOND;
+SELECT INTERVAL 1000001 NANOSECOND > INTERVAL 1 MILLISECOND;
+SELECT INTERVAL 2000000001 NANOSECOND > INTERVAL 2 SECOND;
+SELECT INTERVAL 60000000000 NANOSECOND = INTERVAL 1 MINUTE;
+SELECT INTERVAL 7199999999999 NANOSECOND < INTERVAL 2 HOUR;
+SELECT INTERVAL 1 NANOSECOND < INTERVAL 2 DAY;
+SELECT INTERVAL 5 NANOSECOND < INTERVAL 1 WEEK;
 
-SELECT toIntervalNanosecond(500) < toIntervalNanosecond(300);
-SELECT toIntervalNanosecond(1000) > toIntervalNanosecond(1500);
-SELECT toIntervalNanosecond(2000) != toIntervalNanosecond(2000);
-SELECT toIntervalNanosecond(1000) < toIntervalMicrosecond(1);
-SELECT toIntervalNanosecond(1000001) < toIntervalMillisecond(1);
-SELECT toIntervalNanosecond(2000000001) < toIntervalSecond(2);
-SELECT toIntervalNanosecond(60000000000) != toIntervalMinute(1);
-SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2);
-SELECT toIntervalNanosecond(1) > toIntervalDay(2);
-SELECT toIntervalNanosecond(5) > toIntervalWeek(1);
+SELECT INTERVAL 500 NANOSECOND < INTERVAL 300 NANOSECOND;
+SELECT INTERVAL 1000 NANOSECOND > INTERVAL 1500 NANOSECOND;
+SELECT INTERVAL 2000 NANOSECOND != INTERVAL 2000 NANOSECOND;
+SELECT INTERVAL 1000 NANOSECOND < INTERVAL 1 MICROSECOND;
+SELECT INTERVAL 1000001 NANOSECOND < INTERVAL 1 MILLISECOND;
+SELECT INTERVAL 2000000001 NANOSECOND < INTERVAL 2 SECOND;
+SELECT INTERVAL 60000000000 NANOSECOND != INTERVAL 1 MINUTE;
+SELECT INTERVAL 7199999999999 NANOSECOND > INTERVAL 2 HOUR;
+SELECT INTERVAL 1 NANOSECOND > INTERVAL 2 DAY;
+SELECT INTERVAL 5 NANOSECOND > INTERVAL 1 WEEK;
 
-SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 1 NANOSECOND < INTERVAL 2 MONTH; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing microseconds');
-SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999);
-SELECT toIntervalMicrosecond(1001) > toIntervalMillisecond(1);
-SELECT toIntervalMicrosecond(2000000) = toIntervalSecond(2);
-SELECT toIntervalMicrosecond(179999999) < toIntervalMinute(3);
-SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1);
-SELECT toIntervalMicrosecond(36000000000000) > toIntervalDay(2);
-SELECT toIntervalMicrosecond(1209600000000) = toIntervalWeek(2);
+SELECT INTERVAL 1 MICROSECOND < INTERVAL 999 MICROSECOND;
+SELECT INTERVAL 1001 MICROSECOND > INTERVAL 1 MILLISECOND;
+SELECT INTERVAL 2000000 MICROSECOND = INTERVAL 2 SECOND;
+SELECT INTERVAL 179999999 MICROSECOND < INTERVAL 3 MINUTE;
+SELECT INTERVAL 3600000000 MICROSECOND = INTERVAL 1 HOUR;
+SELECT INTERVAL 36000000000000 MICROSECOND > INTERVAL 2 DAY;
+SELECT INTERVAL 1209600000000 MICROSECOND = INTERVAL 2 WEEK;
 
-SELECT toIntervalMicrosecond(1) > toIntervalMicrosecond(999);
-SELECT toIntervalMicrosecond(1001) < toIntervalMillisecond(1);
-SELECT toIntervalMicrosecond(2000000) != toIntervalSecond(2);
-SELECT toIntervalMicrosecond(179999999) > toIntervalMinute(3);
-SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1);
-SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2);
-SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2);
+SELECT INTERVAL 1 MICROSECOND > INTERVAL 999 MICROSECOND;
+SELECT INTERVAL 1001 MICROSECOND < INTERVAL 1 MILLISECOND;
+SELECT INTERVAL 2000000 MICROSECOND != INTERVAL 2 SECOND;
+SELECT INTERVAL 179999999 MICROSECOND > INTERVAL 3 MINUTE;
+SELECT INTERVAL 3600000000 MICROSECOND != INTERVAL 1 HOUR;
+SELECT INTERVAL 36000000000000 MICROSECOND < INTERVAL 2 DAY;
+SELECT INTERVAL 1209600000000 MICROSECOND != INTERVAL 2 WEEK;
 
-SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 36000000000000 MICROSECOND < INTERVAL 1 QUARTER; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing milliseconds');
-SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2);
-SELECT toIntervalMillisecond(2000) = toIntervalSecond(2);
-SELECT toIntervalMillisecond(170000) < toIntervalMinute(3);
-SELECT toIntervalMillisecond(144000001) > toIntervalHour(40);
-SELECT toIntervalMillisecond(1728000000) = toIntervalDay(20);
-SELECT toIntervalMillisecond(1198599999) < toIntervalWeek(2);
+SELECT INTERVAL 2000 MILLISECOND > INTERVAL 2 MILLISECOND;
+SELECT INTERVAL 2000 MILLISECOND = INTERVAL 2 SECOND;
+SELECT INTERVAL 170000 MILLISECOND < INTERVAL 3 MINUTE;
+SELECT INTERVAL 144000001 MILLISECOND > INTERVAL 40 HOUR;
+SELECT INTERVAL 1728000000 MILLISECOND = INTERVAL 20 DAY;
+SELECT INTERVAL 1198599999 MILLISECOND < INTERVAL 2 WEEK;
 
-SELECT toIntervalMillisecond(2000) < toIntervalMillisecond(2);
-SELECT toIntervalMillisecond(2000) != toIntervalSecond(2);
-SELECT toIntervalMillisecond(170000) > toIntervalMinute(3);
-SELECT toIntervalMillisecond(144000001) < toIntervalHour(40);
-SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20);
-SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2);
+SELECT INTERVAL 2000 MILLISECOND < INTERVAL 2 MILLISECOND;
+SELECT INTERVAL 2000 MILLISECOND != INTERVAL 2 SECOND;
+SELECT INTERVAL 170000 MILLISECOND > INTERVAL 3 MINUTE;
+SELECT INTERVAL 144000001 MILLISECOND < INTERVAL 40 HOUR;
+SELECT INTERVAL 1728000000 MILLISECOND != INTERVAL 20 DAY;
+SELECT INTERVAL 1198599999 MILLISECOND > INTERVAL 2 WEEK;
 
-SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 36000000000000 MILLISECOND < INTERVAL 1 YEAR; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing seconds');
-SELECT toIntervalSecond(120) > toIntervalSecond(2);
-SELECT toIntervalSecond(120) = toIntervalMinute(2);
-SELECT toIntervalSecond(1) < toIntervalHour(2);
-SELECT toIntervalSecond(86401) >= toIntervalDay(1);
-SELECT toIntervalSecond(1209600) = toIntervalWeek(2);
+SELECT INTERVAL 120 SECOND > INTERVAL 2 SECOND;
+SELECT INTERVAL 120 SECOND = INTERVAL 2 MINUTE;
+SELECT INTERVAL 1 SECOND < INTERVAL 2 HOUR;
+SELECT INTERVAL 86401 SECOND >= INTERVAL 1 DAY;
+SELECT INTERVAL 1209600 SECOND = INTERVAL 2 WEEK;
 
-SELECT toIntervalSecond(120) < toIntervalSecond(2);
-SELECT toIntervalSecond(120) != toIntervalMinute(2);
-SELECT toIntervalSecond(1) > toIntervalHour(2);
-SELECT toIntervalSecond(86401) < toIntervalDay(1);
-SELECT toIntervalSecond(1209600) != toIntervalWeek(2);
+SELECT INTERVAL 120 SECOND < INTERVAL 2 SECOND;
+SELECT INTERVAL 120 SECOND != INTERVAL 2 MINUTE;
+SELECT INTERVAL 1 SECOND > INTERVAL 2 HOUR;
+SELECT INTERVAL 86401 SECOND < INTERVAL 1 DAY;
+SELECT INTERVAL 1209600 SECOND != INTERVAL 2 WEEK;
 
-SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 36000000000000 SECOND < INTERVAL 1 MONTH; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing minutes');
-SELECT toIntervalMinute(1) < toIntervalMinute(59);
-SELECT toIntervalMinute(1) < toIntervalHour(59);
-SELECT toIntervalMinute(1440) = toIntervalDay(1);
-SELECT toIntervalMinute(30241) > toIntervalWeek(3);
+SELECT INTERVAL 1 MINUTE < INTERVAL 59 MINUTE;
+SELECT INTERVAL 1 MINUTE < INTERVAL 59 HOUR;
+SELECT INTERVAL 1440 MINUTE = INTERVAL 1 DAY;
+SELECT INTERVAL 30241 MINUTE > INTERVAL 3 WEEK;
 
-SELECT toIntervalMinute(1) > toIntervalMinute(59);
-SELECT toIntervalMinute(1) > toIntervalHour(59);
-SELECT toIntervalMinute(1440) != toIntervalDay(1);
-SELECT toIntervalMinute(30241) < toIntervalWeek(3);
+SELECT INTERVAL 1 MINUTE > INTERVAL 59 MINUTE;
+SELECT INTERVAL 1 MINUTE > INTERVAL 59 HOUR;
+SELECT INTERVAL 1440 MINUTE != INTERVAL 1 DAY;
+SELECT INTERVAL 30241 MINUTE < INTERVAL 3 WEEK;
 
-SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 2 MINUTE = INTERVAL 120 QUARTER; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing hours');
-SELECT toIntervalHour(48) > toIntervalHour(2);
-SELECT toIntervalHour(48) >= toIntervalDay(2);
-SELECT toIntervalHour(672) = toIntervalWeek(4);
+SELECT INTERVAL 48 HOUR > INTERVAL 2 HOUR;
+SELECT INTERVAL 48 HOUR >= INTERVAL 2 DAY;
+SELECT INTERVAL 672 HOUR = INTERVAL 4 WEEK;
 
-SELECT toIntervalHour(48) < toIntervalHour(2);
-SELECT toIntervalHour(48) < toIntervalDay(2);
-SELECT toIntervalHour(672) != toIntervalWeek(4);
+SELECT INTERVAL 48 HOUR < INTERVAL 2 HOUR;
+SELECT INTERVAL 48 HOUR < INTERVAL 2 DAY;
+SELECT INTERVAL 672 HOUR != INTERVAL 4 WEEK;
 
-SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 2 HOUR < INTERVAL 1 YEAR; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing days');
-SELECT toIntervalDay(1) < toIntervalDay(23);
-SELECT toIntervalDay(25) > toIntervalWeek(3);
+SELECT INTERVAL 1 DAY < INTERVAL 23 DAY;
+SELECT INTERVAL 25 DAY > INTERVAL 3 WEEK;
 
-SELECT toIntervalDay(1) > toIntervalDay(23);
-SELECT toIntervalDay(25) < toIntervalWeek(3);
+SELECT INTERVAL 1 DAY > INTERVAL 23 DAY;
+SELECT INTERVAL 25 DAY < INTERVAL 3 WEEK;
 
-SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 2 DAY = INTERVAL 48 MONTH; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing weeks');
-SELECT toIntervalWeek(1) < toIntervalWeek(6);
+SELECT INTERVAL 1 WEEK < INTERVAL 6 WEEK;
 
-SELECT toIntervalWeek(1) > toIntervalWeek(6);
+SELECT INTERVAL 1 WEEK > INTERVAL 6 WEEK;
 
-SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 124 WEEK > INTERVAL 8 QUARTER; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing months');
-SELECT toIntervalMonth(1) < toIntervalMonth(3);
-SELECT toIntervalMonth(124) > toIntervalQuarter(5);
-SELECT toIntervalMonth(36) = toIntervalYear(3);
+SELECT INTERVAL 1 MONTH < INTERVAL 3 MONTH;
+SELECT INTERVAL 124 MONTH > INTERVAL 5 QUARTER;
+SELECT INTERVAL 36 MONTH = INTERVAL 3 YEAR;
 
-SELECT toIntervalMonth(1) > toIntervalMonth(3);
-SELECT toIntervalMonth(124) < toIntervalQuarter(5);
-SELECT toIntervalMonth(36) != toIntervalYear(3);
+SELECT INTERVAL 1 MONTH > INTERVAL 3 MONTH;
+SELECT INTERVAL 124 MONTH < INTERVAL 5 QUARTER;
+SELECT INTERVAL 36 MONTH != INTERVAL 3 YEAR;
 
-SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 6 MONTH = INTERVAL 26 MICROSECOND; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing quarters');
-SELECT toIntervalQuarter(5) > toIntervalQuarter(4);
-SELECT toIntervalQuarter(20) = toIntervalYear(5);
+SELECT INTERVAL 5 QUARTER > INTERVAL 4 QUARTER;
+SELECT INTERVAL 20 QUARTER = INTERVAL 5 YEAR;
 
-SELECT toIntervalQuarter(5) < toIntervalQuarter(4);
-SELECT toIntervalQuarter(20) != toIntervalYear(5);
+SELECT INTERVAL 5 QUARTER < INTERVAL 4 QUARTER;
+SELECT INTERVAL 20 QUARTER != INTERVAL 5 YEAR;
 
-SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 2 QUARTER = INTERVAL 6 NANOSECOND; -- { serverError NO_COMMON_TYPE }
 
 SELECT('Comparing years');
-SELECT toIntervalYear(1) < toIntervalYear(3);
+SELECT INTERVAL 1 YEAR < INTERVAL 3 YEAR;
 
-SELECT toIntervalYear(1) > toIntervalYear(3);
+SELECT INTERVAL 1 YEAR > INTERVAL 3 YEAR;
 
-SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError NO_COMMON_TYPE }
+SELECT INTERVAL 2 YEAR = INTERVAL 8 SECOND; -- { serverError NO_COMMON_TYPE }
\ No newline at end of file

From 9c7d9a6a8d96b88c56aaa95b691f2b9bf79cf8d4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 12 Aug 2024 23:57:03 +0200
Subject: [PATCH 130/363] Annotations

---
 tests/queries/0_stateless/02293_ttest_large_samples.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02293_ttest_large_samples.sql b/tests/queries/0_stateless/02293_ttest_large_samples.sql
index 826bd483fe9..b4687541360 100644
--- a/tests/queries/0_stateless/02293_ttest_large_samples.sql
+++ b/tests/queries/0_stateless/02293_ttest_large_samples.sql
@@ -1,3 +1,5 @@
+-- Tags: long
+
 SELECT roundBankers(result.1, 5), roundBankers(result.2, 5) FROM (
 SELECT
      studentTTest(sample, variant) as result

From 1767ec6b4ca0fc0e8546e705e0d0dff3ffa797cb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 12 Aug 2024 23:55:01 +0200
Subject: [PATCH 131/363] Debug test

---
 .../0_stateless/02490_benchmark_max_consecutive_errors.sh        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh
index f747b3156a5..df7e9386662 100755
--- a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh
+++ b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh
@@ -11,5 +11,6 @@ if [ "$RES" -eq 10 ]
 then
     echo "$RES"
 else
+    echo "$RES"
     cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log"
 fi

From 4c043301e6dde6b0c83394d6721e112c9c7bf4ce Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 13 Aug 2024 10:30:31 +0200
Subject: [PATCH 132/363] Avoid ignoring errors of execute_process() (set
 COMMAND_ERROR_IS_FATAL=ANY)

This will fix with issues like this [1]:

    Aug 12 09:58:44 '/usr/bin/cmake' '--build' '/build/build_docker/native' '--target' 'pre_compressor'
    Aug 12 09:58:44 sccache: error: Server startup failed: cache storage failed to read: Unexpected (temporary) at stat
    Aug 12 09:58:45 ninja: build stopped: subcommand failed.
    Aug 12 09:58:45 -- Configuring done (77.7s)
    Aug 12 09:58:47 -- Generating done (1.8s)
    Aug 12 09:58:47 -- Build files have been written to: /build/build_docker

So as you can see even if ninja fails it still wrote build files, while
it should fail.

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/64955/0af41e32a5822d25ac3760f1ebb2313557474701/builds/report.html
  [2]: https://s3.amazonaws.com/clickhouse-builds/PRs/64955/0af41e32a5822d25ac3760f1ebb2313557474701/binary_darwin_aarch64/build_log.log

Note, COMMAND_ERROR_IS_FATAL is 3.19+, and the requirement for now is
3.20

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 CMakeLists.txt                                | 12 ++++++--
 PreLoad.cmake                                 | 10 +++++--
 cmake/freebsd/default_libs.cmake              | 12 ++++++--
 cmake/linux/default_libs.cmake                |  6 +++-
 cmake/tools.cmake                             |  6 +++-
 cmake/utils.cmake                             |  5 +++-
 contrib/cctz-cmake/CMakeLists.txt             |  4 ++-
 contrib/google-protobuf-cmake/CMakeLists.txt  | 12 ++++++--
 contrib/grpc-cmake/CMakeLists.txt             | 30 +++++++++++++------
 .../completions/CMakeLists.txt                |  1 +
 10 files changed, 75 insertions(+), 23 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b4e0484ab1..8e2302e6c52 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -609,7 +609,9 @@ if (NATIVE_BUILD_TARGETS
 
     execute_process(
         COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}"
-        COMMAND_ECHO STDOUT)
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
+    )
 
     execute_process(
         COMMAND ${CMAKE_COMMAND}
@@ -621,9 +623,13 @@ if (NATIVE_BUILD_TARGETS
             "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}"
         ${PROJECT_SOURCE_DIR}
         WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
-        COMMAND_ECHO STDOUT)
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
+    )
 
     execute_process(
         COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS}
-        COMMAND_ECHO STDOUT)
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
+    )
 endif ()
diff --git a/PreLoad.cmake b/PreLoad.cmake
index e0fd37b2fd6..92b221c9f63 100644
--- a/PreLoad.cmake
+++ b/PreLoad.cmake
@@ -51,8 +51,14 @@ if (NOT "$ENV{CFLAGS}" STREQUAL ""
 endif()
 
 # Default toolchain - this is needed to avoid dependency on OS files.
-execute_process(COMMAND uname -s OUTPUT_VARIABLE OS)
-execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH)
+execute_process(COMMAND uname -s
+    OUTPUT_VARIABLE OS
+    COMMAND_ERROR_IS_FATAL ANY
+)
+execute_process(COMMAND uname -m
+    OUTPUT_VARIABLE ARCH
+    COMMAND_ERROR_IS_FATAL ANY
+)
 
 # By default, prefer clang on Linux
 # But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER.
diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake
index 6bde75f8c9a..3f5b3829877 100644
--- a/cmake/freebsd/default_libs.cmake
+++ b/cmake/freebsd/default_libs.cmake
@@ -9,10 +9,18 @@ endif ()
 file(GLOB bprefix "/usr/local/llvm${COMPILER_VERSION_MAJOR}/lib/clang/${COMPILER_VERSION_MAJOR}/lib/${system_processor}-portbld-freebsd*/")
 message(STATUS "-Bprefix: ${bprefix}")
 
-execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+execute_process(COMMAND
+    ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a
+    OUTPUT_VARIABLE BUILTINS_LIBRARY
+    COMMAND_ERROR_IS_FATAL ANY
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
 # --print-file-name simply prints what you passed in case of nothing was resolved, so let's try one other possible option
 if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins-${system_processor}.a")
-    execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+    execute_process(COMMAND
+        ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a
+        OUTPUT_VARIABLE BUILTINS_LIBRARY
+        COMMAND_ERROR_IS_FATAL ANY
+        OUTPUT_STRIP_TRAILING_WHITESPACE)
 endif()
 if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins.a")
     message(FATAL_ERROR "libclang_rt.builtins had not been found")
diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake
index 4a06243243e..51620bc9f33 100644
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@@ -5,7 +5,11 @@ set (DEFAULT_LIBS "-nodefaultlibs")
 
 # We need builtins from Clang's RT even without libcxx - for ubsan+int128.
 # See https://bugs.llvm.org/show_bug.cgi?id=16404
-execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+execute_process (COMMAND
+    ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt
+    OUTPUT_VARIABLE BUILTINS_LIBRARY
+    COMMAND_ERROR_IS_FATAL ANY
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
 
 # Apparently, in clang-19, the UBSan support library for C++ was moved out into ubsan_standalone_cxx.a, so we have to include both.
 if (SANITIZE STREQUAL undefined)
diff --git a/cmake/tools.cmake b/cmake/tools.cmake
index 7aa5d4c51ce..5c7da54b779 100644
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@@ -5,7 +5,11 @@ if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 endif ()
 
 # Print details to output
-execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE COMPILER_SELF_IDENTIFICATION OUTPUT_STRIP_TRAILING_WHITESPACE)
+execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version
+    OUTPUT_VARIABLE COMPILER_SELF_IDENTIFICATION
+    COMMAND_ERROR_IS_FATAL ANY
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+)
 message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}")
 
 # Require minimum compiler versions
diff --git a/cmake/utils.cmake b/cmake/utils.cmake
index a318408098a..a99d8e050a8 100644
--- a/cmake/utils.cmake
+++ b/cmake/utils.cmake
@@ -90,7 +90,10 @@ endfunction()
 
 # Function get_cmake_properties returns list of all propreties that cmake supports
 function(get_cmake_properties outvar)
-    execute_process(COMMAND cmake --help-property-list OUTPUT_VARIABLE cmake_properties)
+    execute_process(COMMAND cmake --help-property-list
+        OUTPUT_VARIABLE cmake_properties
+        COMMAND_ERROR_IS_FATAL ANY
+    )
     # Convert command output into a CMake list
     string(REGEX REPLACE ";" "\\\\;" cmake_properties "${cmake_properties}")
     string(REGEX REPLACE "\n" ";" cmake_properties "${cmake_properties}")
diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt
index 7161f743de1..fadf948b053 100644
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@@ -37,7 +37,9 @@ message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
 execute_process(COMMAND
     bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
     OUTPUT_STRIP_TRAILING_WHITESPACE
-    OUTPUT_VARIABLE TIMEZONES)
+    OUTPUT_VARIABLE TIMEZONES
+    COMMAND_ERROR_IS_FATAL ANY
+)
 
 file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
 file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index e44f737cfc3..f1a744f851f 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -359,7 +359,9 @@ else ()
 
         execute_process(
             COMMAND mkdir -p ${PROTOC_BUILD_DIR}
-            COMMAND_ECHO STDOUT)
+            COMMAND_ECHO STDOUT
+            COMMAND_ERROR_IS_FATAL ANY
+        )
 
         execute_process(
             COMMAND ${CMAKE_COMMAND}
@@ -375,11 +377,15 @@ else ()
                 "-DABSL_ENABLE_INSTALL=0"
                 "${protobuf_source_dir}"
             WORKING_DIRECTORY "${PROTOC_BUILD_DIR}"
-            COMMAND_ECHO STDOUT)
+            COMMAND_ECHO STDOUT
+            COMMAND_ERROR_IS_FATAL ANY
+        )
 
         execute_process(
             COMMAND ${CMAKE_COMMAND} --build "${PROTOC_BUILD_DIR}"
-            COMMAND_ECHO STDOUT)
+            COMMAND_ECHO STDOUT
+            COMMAND_ERROR_IS_FATAL ANY
+        )
     endif ()
 
     add_executable(protoc IMPORTED GLOBAL)
diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt
index 1c0bf41ff78..975774d1990 100644
--- a/contrib/grpc-cmake/CMakeLists.txt
+++ b/contrib/grpc-cmake/CMakeLists.txt
@@ -51,8 +51,9 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
     set(OPENSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake")
 
     execute_process(
-      COMMAND mkdir -p ${OPENSSL_BUILD_DIR}
-      COMMAND_ECHO STDOUT
+        COMMAND mkdir -p ${OPENSSL_BUILD_DIR}
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
     )
 
     if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
@@ -89,15 +90,21 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
             "-DClickHouse_SOURCE_DIR=${ClickHouse_SOURCE_DIR}"
             "${OPENSSL_SOURCE_DIR}"
         WORKING_DIRECTORY "${OPENSSL_BUILD_DIR}"
-        COMMAND_ECHO STDOUT)
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
+    )
 
     execute_process(
         COMMAND ${CMAKE_COMMAND} --build "${OPENSSL_BUILD_DIR}"
-        COMMAND_ECHO STDOUT)
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
+    )
 
     execute_process(
         COMMAND ${CMAKE_COMMAND} --install "${OPENSSL_BUILD_DIR}"
-        COMMAND_ECHO STDOUT)
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
+    )
 
     # It's not important on which file we depend, we just want to specify right order
     add_library(openssl_for_grpc STATIC IMPORTED GLOBAL)
@@ -108,8 +115,9 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
     set (GRPC_CPP_PLUGIN_BUILD_DIR "${_gRPC_BINARY_DIR}/build")
 
     execute_process(
-      COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR}
-      COMMAND_ECHO STDOUT
+        COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR}
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
     )
 
     set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
@@ -140,11 +148,15 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
             "-DgRPC_SSL_PROVIDER=package"
             "${_gRPC_SOURCE_DIR}"
         WORKING_DIRECTORY "${GRPC_CPP_PLUGIN_BUILD_DIR}"
-        COMMAND_ECHO STDOUT)
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
+    )
 
     execute_process(
         COMMAND ${CMAKE_COMMAND} --build "${GRPC_CPP_PLUGIN_BUILD_DIR}"
-        COMMAND_ECHO STDOUT)
+        COMMAND_ECHO STDOUT
+        COMMAND_ERROR_IS_FATAL ANY
+    )
 
     add_executable(grpc_cpp_plugin IMPORTED GLOBAL)
     set_target_properties (grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin")
diff --git a/programs/bash-completion/completions/CMakeLists.txt b/programs/bash-completion/completions/CMakeLists.txt
index d364e07ef6e..2e911e81981 100644
--- a/programs/bash-completion/completions/CMakeLists.txt
+++ b/programs/bash-completion/completions/CMakeLists.txt
@@ -6,6 +6,7 @@ macro(configure_bash_completion)
             COMMAND ${PKG_CONFIG_BIN} --variable=completionsdir bash-completion
             OUTPUT_VARIABLE ${out}
             OUTPUT_STRIP_TRAILING_WHITESPACE
+            COMMAND_ERROR_IS_FATAL ANY
         )
     endif()
     string(REPLACE /usr "${CMAKE_INSTALL_PREFIX}" out "${out}")

From 7f005a6ca48d4f193470d3a71bc1d97ff55f4a2f Mon Sep 17 00:00:00 2001
From: shiyer7474 <shiyer@altinity.com>
Date: Tue, 13 Aug 2024 08:38:30 +0000
Subject: [PATCH 133/363] Fix small value DateTime64 constant folding in nested
 query

---
 src/Analyzer/ConstantNode.cpp                 | 10 ++++-
 ...222_datetime64_small_value_const.reference | 18 +++++++++
 .../03222_datetime64_small_value_const.sql    | 39 +++++++++++++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03222_datetime64_small_value_const.reference
 create mode 100644 tests/queries/0_stateless/03222_datetime64_small_value_const.sql

diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp
index c65090f5b55..3d0f448da4b 100644
--- a/src/Analyzer/ConstantNode.cpp
+++ b/src/Analyzer/ConstantNode.cpp
@@ -177,9 +177,15 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const
           * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal.
           * For this reason, we use a string literal representing a date instead of a Decimal64 literal.
           */
-        if (WhichDataType(constant_value_type->getTypeId()).isDateTime64())
+        if ((WhichDataType(constant_value_type->getTypeId()).isDateTime64()) ||
+            (WhichDataType(constant_value_type->getTypeId()).isNullable() && WhichDataType((typeid_cast<const DataTypeNullable *>(constant_value_type.get()))->getNestedType()->getTypeId()).isDateTime64()))
         {
-            const auto * date_time_type = typeid_cast<const DataTypeDateTime64 *>(constant_value_type.get());
+            const DataTypeDateTime64 * date_time_type = nullptr;
+            if (WhichDataType(constant_value_type->getTypeId()).isNullable())
+                date_time_type = typeid_cast<const DataTypeDateTime64 *>((typeid_cast<const DataTypeNullable *>(constant_value_type.get()))->getNestedType().get());
+            else
+                date_time_type = typeid_cast<const DataTypeDateTime64 *>(constant_value_type.get());
+
             DecimalField<Decimal64> decimal_value;
             if (constant_value_literal.tryGet<DecimalField<Decimal64>>(decimal_value))
             {
diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.reference b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference
new file mode 100644
index 00000000000..ae36c08acc5
--- /dev/null
+++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference
@@ -0,0 +1,18 @@
+0	1970-01-01 00:00:00.000
+0	1970-01-01 00:00:05.000
+0	1970-01-01 00:45:25.456789
+0	1970-01-01 00:53:25.456789123
+0	\N
+1	1970-01-01 00:00:00.000
+5	1970-01-01 00:00:00.000
+2	1970-01-01 00:00:02.456
+3	1970-01-01 00:00:04.811
+4	1970-01-01 00:10:05.000
+4	1970-01-01 00:10:05.000
+1	1970-01-01 00:00:00.000
+2	1970-01-01 00:00:02.456
+3	1970-01-01 00:00:04.811
+5	1970-01-01 00:00:00.000
+0
+0
+5
diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
new file mode 100644
index 00000000000..6999ba9662a
--- /dev/null
+++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
@@ -0,0 +1,39 @@
+-- Tags: shard
+
+select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;
+select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;
+select *, (select toDateTime64('1970-01-01 00:45:25.456789', 6)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;
+select *, (select toDateTime64('1970-01-01 00:53:25.456789123', 9)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;
+select *, (select toDateTime64(null,3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;
+
+create database if not exists shard_0;
+create database if not exists shard_1;
+
+drop table if exists shard_0.dt64_03222;
+drop table if exists shard_1.dt64_03222;
+drop table if exists distr_03222_dt64;
+
+create table shard_0.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id;
+create table shard_1.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id;
+create table distr_03222_dt64 (id UInt64, dt DateTime64(3)) engine = Distributed(test_cluster_two_shards_different_databases, '', dt64_03222);
+
+insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3))
+insert into shard_0.dt64_03222 values(2, toDateTime64('1970-01-01 00:00:02.456',3));
+insert into shard_1.dt64_03222 values(3, toDateTime64('1970-01-01 00:00:04.811',3));
+insert into shard_1.dt64_03222 values(4, toDateTime64('1970-01-01 00:10:05',3));
+insert into shard_1.dt64_03222 values(5, toDateTime64(0,3));
+
+--Output : 1,5 2,3,4 4 1,2,3,5 0 0 5
+select id, dt from distr_03222_dt64 where dt = (select toDateTime64(0,3)) order by id;
+select id, dt from distr_03222_dt64 where dt > (select toDateTime64(0,3)) order by id;
+select id, dt from distr_03222_dt64 where dt > (select toDateTime64('1970-01-01 00:10:00.000',3)) order by id;
+select id, dt from distr_03222_dt64 where dt < (select toDateTime64(5,3)) order by id;
+
+select count(*) from distr_03222_dt64 where dt > (select toDateTime64('2024-07-20 00:00:00',3));
+select count(*) from distr_03222_dt64 where dt > (select now());
+select count(*) from distr_03222_dt64 where dt < (select toDateTime64('2004-07-20 00:00:00',3));
+
+
+drop table if exists shard_0.dt64_03222;
+drop table if exists shard_1.dt64_03222;
+drop table if exists distr_03222_dt64;

From e46c5a75ef6b5488834add56bea4cab327515bfb Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 13 Aug 2024 17:59:05 +0800
Subject: [PATCH 134/363] fix building issue

---
 src/Functions/{FunctionOverlay.cpp => overlay.cpp} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename src/Functions/{FunctionOverlay.cpp => overlay.cpp} (99%)

diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/overlay.cpp
similarity index 99%
rename from src/Functions/FunctionOverlay.cpp
rename to src/Functions/overlay.cpp
index 61d2df88ab1..094da27a71d 100644
--- a/src/Functions/FunctionOverlay.cpp
+++ b/src/Functions/overlay.cpp
@@ -728,8 +728,8 @@ private:
 
 REGISTER_FUNCTION(Overlay)
 {
-    factory.registerFunction<FunctionOverlay<false>>({}, FunctionFactory::CaseInsensitive);
-    factory.registerFunction<FunctionOverlay<true>>({}, FunctionFactory::CaseSensitive);
+    factory.registerFunction<FunctionOverlay<false>>({}, FunctionFactory::Case::Insensitive);
+    factory.registerFunction<FunctionOverlay<true>>({}, FunctionFactory::Case::Sensitive);
 }
 
 }

From 0414cdbbbf32efe10a92c9dd93ba47743ceeb848 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 13 Aug 2024 15:58:49 +0200
Subject: [PATCH 135/363] Fix unpack error

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 5946e561949..5bde4686d3a 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1995,7 +1995,7 @@ class TestSuite:
                     tag_line = find_tag_line(file)
                     next_line = file.readline()
                 except UnicodeDecodeError:
-                    return []
+                    return [], {}
                 try:
                     if filepath.endswith(".sql"):
                         for line in file:

From 69893aaa25e3c459f3480955534e05456e7aaa64 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 13 Aug 2024 16:19:25 +0200
Subject: [PATCH 136/363] Lower memory usage

---
 docker/test/stateless/run.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index c70cbe1fe45..874095e39dc 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -391,8 +391,8 @@ done
 # wait for minio to flush its batch if it has any
 sleep 1
 clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
-clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
-clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
+clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
+clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
 
 # Stop server so we can safely read data with clickhouse-local.
 # Why do we read data with clickhouse-local?

From 6dfed409f460311f133e30e70f839f9865d71861 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 13 Aug 2024 16:09:45 +0000
Subject: [PATCH 137/363] Fix seraching for query params

---
 tests/clickhouse-test | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 5bde4686d3a..515b519af3e 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1978,13 +1978,20 @@ class TestSuite:
         def is_shebang(line: str) -> bool:
             return line.startswith("#!")
 
-        def find_tag_line(file):
-            line = file.readline()
-            while line != "":
-                line = line.strip()
-                if line and not is_shebang(line):
+        def find_tag_line(lines, comment_sign):
+            for line in lines:
+                if line.startswith(comment_sign) and line[
+                    len(comment_sign) :
+                ].lstrip().startswith("Tags:"):
+                    return line
+            return ""
+
+        def find_random_settings_limits_line(lines, comment_sign):
+            for line in lines:
+                if line.startswith(comment_sign) and line[
+                    len(comment_sign) :
+                ].lstrip().startswith("Random settings limits:"):
                     return line
-                line = file.readline()
             return ""
 
         def load_tags_and_random_settings_limits_from_file(filepath):
@@ -1992,13 +1999,16 @@ class TestSuite:
             need_query_params = False
             with open(filepath, "r", encoding="utf-8") as file:
                 try:
-                    tag_line = find_tag_line(file)
-                    next_line = file.readline()
+                    lines = file.readlines()
+                    tag_line = find_tag_line(lines, comment_sign)
+                    random_settings_limits_line = find_random_settings_limits_line(
+                        lines, comment_sign
+                    )
                 except UnicodeDecodeError:
                     return [], {}
                 try:
                     if filepath.endswith(".sql"):
-                        for line in file:
+                        for line in lines:
                             if "{CLICKHOUSE_DATABASE" in line:
                                 need_query_params = True
                 except UnicodeDecodeError:
@@ -2006,7 +2016,6 @@ class TestSuite:
             parsed_tags = parse_tags_from_line(tag_line, comment_sign)
             if need_query_params:
                 parsed_tags.add("need-query-parameters")
-            random_settings_limits_line = next_line if parsed_tags else tag_line
             random_settings_limits = parse_random_settings_limits_from_line(
                 random_settings_limits_line, comment_sign
             )
@@ -2068,9 +2077,9 @@ class TestSuite:
             )
         )
         self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0]
-        self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = (
-            all_tags_and_random_settings_limits[1]
-        )
+        self.all_random_settings_limits: Dict[
+            str, Dict[str, (int, int)]
+        ] = all_tags_and_random_settings_limits[1]
         self.sequential_tests = []
         self.parallel_tests = []
         for test_name in self.all_tests:

From 0abca8b7ddbafa37da5b1196b21fb816999fd334 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 14 Aug 2024 09:57:59 +0800
Subject: [PATCH 138/363] fix doc

---
 .../sql-reference/functions/string-replace-functions.md   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index 1caa6215b6b..d086c9ee64b 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -263,8 +263,8 @@ overlay(s, replace, position[, length])
 
 - `s`: A string type [String](../data-types/string.md).
 - `replace`: A string type [String](../data-types/string.md).
-- `position`: An integer type [Int](../data-types/int.md).
-- `length`: Optional. An integer type [Int](../data-types/int.md).
+- `position`: An integer type [Int](../data-types/int-uint.md).
+- `length`: Optional. An integer type [Int](../data-types/int-uint.md).
 
 **Returned value**
 
@@ -300,8 +300,8 @@ overlayUTF8(s, replace, position[, length])
 
 - `s`: A string type [String](../data-types/string.md).
 - `replace`: A string type [String](../data-types/string.md).
-- `position`: An integer type [Int](../data-types/int.md).
-- `length`: Optional. An integer type [Int](../data-types/int.md).
+- `position`: An integer type [Int](../data-types/int-uint.md).
+- `length`: Optional. An integer type [Int](../data-types/int-uint.md).
 
 **Returned value**
 

From da1c98a771c5b6cd74b3e1ba00fd4e01574489e9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 14 Aug 2024 04:54:33 +0200
Subject: [PATCH 139/363] Update the limits

---
 docker/test/stateless/run.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 874095e39dc..2e6e7bbebe5 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -391,8 +391,8 @@ done
 # wait for minio to flush its batch if it has any
 sleep 1
 clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
-clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
-clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
+clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
+clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
 
 # Stop server so we can safely read data with clickhouse-local.
 # Why do we read data with clickhouse-local?

From f740cf4eaa71621fb518c6d5668e8356f452a979 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 14 Aug 2024 09:54:03 +0200
Subject: [PATCH 140/363] Fix data race on SampleKey

---
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index a6ef0063069..6efd3a5c97f 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -369,7 +369,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
             /// If sample and final are used together no need to calculate sampling expression twice.
             /// The first time it was calculated for final, because sample key is a part of the PK.
             /// So, assume that we already have calculated column.
-            ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST();
+            ASTPtr sampling_key_ast;
 
             if (final)
             {
@@ -377,6 +377,12 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
                 /// We do spoil available_real_columns here, but it is not used later.
                 available_real_columns.emplace_back(sampling_key.column_names[0], std::move(sampling_column_type));
             }
+            else
+            {
+                sampling_key_ast = metadata_snapshot->getSamplingKeyAST()->clone();
+            }
+
+            chassert(sampling_key_ast != nullptr);
 
             if (has_lower_limit)
             {

From 28b0aad3f9e54beed27ee384ab81312233abaa84 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 14 Aug 2024 15:16:34 +0200
Subject: [PATCH 141/363] Fix python style

---
 tests/clickhouse-test | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 515b519af3e..8c2da7334d4 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1975,9 +1975,6 @@ class TestSuite:
                 )
             return random_settings_limits
 
-        def is_shebang(line: str) -> bool:
-            return line.startswith("#!")
-
         def find_tag_line(lines, comment_sign):
             for line in lines:
                 if line.startswith(comment_sign) and line[
@@ -2077,9 +2074,9 @@ class TestSuite:
             )
         )
         self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0]
-        self.all_random_settings_limits: Dict[
-            str, Dict[str, (int, int)]
-        ] = all_tags_and_random_settings_limits[1]
+        self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = (
+            all_tags_and_random_settings_limits[1]
+        )
         self.sequential_tests = []
         self.parallel_tests = []
         for test_name in self.all_tests:

From 844cdd8937cce17060ea8b54fdfc2428d3015f44 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 14 Aug 2024 20:38:09 +0200
Subject: [PATCH 142/363] update toInterval functions

---
 .../functions/type-conversion-functions.md    | 400 +++++++++++++++++-
 1 file changed, 380 insertions(+), 20 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 1e618b8cdab..cd6fd9ab839 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -4866,30 +4866,23 @@ Result:
 └───────┴───────────────┴──────┴──────────────┴──────────────┴──────────────────────┘
 ```
 
-## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second)
+## toIntervalYear
 
-Converts a Number type argument to an [Interval](../data-types/special-data-types/interval.md) data type.
+Returns an interval of `n` years of data type [IntervalYear](../data-types/special-data-types/interval.md).
 
 **Syntax**
 
 ``` sql
-toIntervalSecond(number)
-toIntervalMinute(number)
-toIntervalHour(number)
-toIntervalDay(number)
-toIntervalWeek(number)
-toIntervalMonth(number)
-toIntervalQuarter(number)
-toIntervalYear(number)
+toIntervalYear(n)
 ```
 
 **Arguments**
 
-- `number` — Duration of interval. Positive integer number.
+- `n` — Number of years. Positive integer number. [Int*](../data-types/int-uint.md).
 
 **Returned values**
 
-- The value in `Interval` data type.
+- Interval of `n` years. [IntervalYear](../data-types/special-data-types/interval.md).
 
 **Example**
 
@@ -4898,19 +4891,386 @@ Query:
 ``` sql
 WITH
     toDate('2019-01-01') AS date,
-    INTERVAL 1 WEEK AS interval_week,
-    toIntervalWeek(1) AS interval_to_week
-SELECT
-    date + interval_week,
-    date + interval_to_week;
+    toIntervalYear(1) AS interval_to_year
+SELECT date + interval_to_year
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
-│                2019-01-08 │                   2019-01-08 │
-└───────────────────────────┴──────────────────────────────┘
+┌─plus(date, interval_to_year)─┐
+│                   2020-01-01 │
+└──────────────────────────────┘
+```
+
+## toIntervalQuarter
+
+Returns an interval of `n` quarters of data type [IntervalQuarter](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalQuarter(n)
+```
+
+**Arguments**
+
+- `n` — Number of quarters. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` quarters. [IntervalQuarter](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDate('2019-01-01') AS date,
+    toIntervalQuarter(1) AS interval_to_quarter
+SELECT date + interval_to_quarter
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_quarter)─┐
+│                      2019-04-01 │
+└─────────────────────────────────┘
+```
+
+## toIntervalMonth
+
+Returns an interval of `n` months of data type [IntervalMonth](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalMonth(n)
+```
+
+**Arguments**
+
+- `n` — Number of m. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` months. [IntervalMonth](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDate('2019-01-01') AS date,
+    toIntervalMonth(1) AS interval_to_month
+SELECT date + interval_to_month
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_month)─┐
+│                    2019-02-01 │
+└───────────────────────────────┘
+```
+
+## toIntervalWeek
+
+Returns an interval of `n` weeks of data type [IntervalWeek](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalWeek(n)
+```
+
+**Arguments**
+
+- `n` — Number of weeks. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` weeks. [IntervalWeek](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDate('2019-01-01') AS date,
+    toIntervalWeek(1) AS interval_to_week
+SELECT date + interval_to_week
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_week)─┐
+│                   2019-01-08 │
+└──────────────────────────────┘
+```
+
+## toIntervalDay
+
+Returns an interval of `n` days of data type [IntervalDay](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalDay(n)
+```
+
+**Arguments**
+
+- `n` — Number of days. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` days. [IntervalDay](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDate('2019-01-01') AS date,
+    toIntervalDay(5) AS interval_to_days
+SELECT date + interval_to_days
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_days)─┐
+│                   2019-01-06 │
+└──────────────────────────────┘
+```
+
+## toIntervalHour
+
+Returns an interval of `n` hours of data type [IntervalHour](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalHour(n)
+```
+
+**Arguments**
+
+- `n` — Number of hours. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` hours. [IntervalHour](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDate('2019-01-01') AS date,
+    toIntervalHour(12) AS interval_to_hours
+SELECT date + interval_to_hours
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_hours)─┐
+│           2019-01-01 12:00:00 │
+└───────────────────────────────┘
+```
+
+## toIntervalMinute
+
+Returns an interval of `n` minutes of data type [IntervalMinute](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalMinute(n)
+```
+
+**Arguments**
+
+- `n` — Number of minutes. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` minutes. [IntervalMinute](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDate('2019-01-01') AS date,
+    toIntervalMinute(12) AS interval_to_minutes
+SELECT date + interval_to_minutes
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_minutes)─┐
+│             2019-01-01 00:12:00 │
+└─────────────────────────────────┘
+```
+
+## toIntervalSecond
+
+Returns an interval of `n` seconds of data type [IntervalSecond](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalSecond(n)
+```
+
+**Arguments**
+
+- `n` — Number of seconds. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` seconds. [IntervalSecond](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDate('2019-01-01') AS date,
+    toIntervalSecond(30) AS interval_to_seconds
+SELECT date + interval_to_seconds
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_seconds)─┐
+│             2019-01-01 00:00:30 │
+└─────────────────────────────────┘
+```
+
+## toIntervalMillisecond
+
+Returns an interval of `n` milliseconds of data type [IntervalMillisecond](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalMillisecond(n)
+```
+
+**Arguments**
+
+- `n` — Number of milliseconds. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` milliseconds. [IntervalMilliseconds](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDateTime('2019-01-01') AS date,
+    toIntervalMillisecond(30) AS interval_to_milliseconds
+SELECT date + interval_to_milliseconds
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_milliseconds)─┐
+│              2019-01-01 00:00:00.030 │
+└──────────────────────────────────────┘
+```
+
+## toIntervalMicrosecond
+
+Returns an interval of `n` microseconds of data type [IntervalMicrosecond](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalMicrosecond(n)
+```
+
+**Arguments**
+
+- `n` — Number of microseconds. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` microseconds. [IntervalMicrosecond](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDateTime('2019-01-01') AS date,
+    toIntervalMicrosecond(30) AS interval_to_microseconds
+SELECT date + interval_to_microseconds
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_microseconds)─┐
+│           2019-01-01 00:00:00.000030 │
+└──────────────────────────────────────┘
+```
+
+## toIntervalNanosecond
+
+Returns an interval of `n` nanoseconds of data type [IntervalNanosecond](../data-types/special-data-types/interval.md).
+
+**Syntax**
+
+``` sql
+toIntervalNanosecond(n)
+```
+
+**Arguments**
+
+- `n` — Number of nanoseconds. Positive integer number. [Int*](../data-types/int-uint.md).
+
+**Returned values**
+
+- Interval of `n` nanoseconds. [IntervalNanosecond](../data-types/special-data-types/interval.md).
+
+**Example**
+
+Query:
+
+``` sql
+WITH
+    toDateTime('2019-01-01') AS date,
+    toIntervalNanosecond(30) AS interval_to_nanoseconds
+SELECT date + interval_to_nanoseconds
+```
+
+Result:
+
+```response
+┌─plus(date, interval_to_nanoseconds)─┐
+│       2019-01-01 00:00:00.000000030 │
+└─────────────────────────────────────┘
 ```
 
 ## parseDateTime

From 7bebc448f30a026e41563553788f463243aa18fd Mon Sep 17 00:00:00 2001
From: megao <jetgm@163.com>
Date: Thu, 15 Aug 2024 10:42:22 +0800
Subject: [PATCH 143/363] fix progress value of view_refreshview

---
 src/Storages/System/StorageSystemViewRefreshes.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp
index 30539ed6b6a..97065f2b2fb 100644
--- a/src/Storages/System/StorageSystemViewRefreshes.cpp
+++ b/src/Storages/System/StorageSystemViewRefreshes.cpp
@@ -86,7 +86,8 @@ void StorageSystemViewRefreshes::fillData(
 
         res_columns[i++]->insert(refresh.exception_message);
         res_columns[i++]->insert(refresh.refresh_count);
-        res_columns[i++]->insert(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read);
+//        res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, static_cast<Float64>(1)));
+        res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0));
         res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9);
         res_columns[i++]->insert(refresh.progress.read_rows);
         res_columns[i++]->insert(refresh.progress.read_bytes);

From c9bfff3934ba5257dbfd1d43294b27155c6aec30 Mon Sep 17 00:00:00 2001
From: megao <jetgm@163.com>
Date: Thu, 15 Aug 2024 10:52:17 +0800
Subject: [PATCH 144/363] fix progress value of view_refreshview

---
 src/Storages/System/StorageSystemViewRefreshes.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp
index 97065f2b2fb..3941c4c39c2 100644
--- a/src/Storages/System/StorageSystemViewRefreshes.cpp
+++ b/src/Storages/System/StorageSystemViewRefreshes.cpp
@@ -86,7 +86,6 @@ void StorageSystemViewRefreshes::fillData(
 
         res_columns[i++]->insert(refresh.exception_message);
         res_columns[i++]->insert(refresh.refresh_count);
-//        res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, static_cast<Float64>(1)));
         res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0));
         res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9);
         res_columns[i++]->insert(refresh.progress.read_rows);

From b82c231886f2496c01b288a138663c4d430fc7b2 Mon Sep 17 00:00:00 2001
From: shiyer7474 <shiyer@altinity.com>
Date: Thu, 15 Aug 2024 10:37:13 +0000
Subject: [PATCH 145/363] Code review feedback - used removeNullable()

---
 src/Analyzer/ConstantNode.cpp                         | 11 +++--------
 .../03222_datetime64_small_value_const.sql            |  2 +-
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp
index 3d0f448da4b..3a99ad08ad8 100644
--- a/src/Analyzer/ConstantNode.cpp
+++ b/src/Analyzer/ConstantNode.cpp
@@ -177,15 +177,10 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const
           * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal.
           * For this reason, we use a string literal representing a date instead of a Decimal64 literal.
           */
-        if ((WhichDataType(constant_value_type->getTypeId()).isDateTime64()) ||
-            (WhichDataType(constant_value_type->getTypeId()).isNullable() && WhichDataType((typeid_cast<const DataTypeNullable *>(constant_value_type.get()))->getNestedType()->getTypeId()).isDateTime64()))
+        const auto & constant_value_end_type = removeNullable(constant_value_type); /// if Nullable
+        if (WhichDataType(constant_value_end_type->getTypeId()).isDateTime64())
         {
-            const DataTypeDateTime64 * date_time_type = nullptr;
-            if (WhichDataType(constant_value_type->getTypeId()).isNullable())
-                date_time_type = typeid_cast<const DataTypeDateTime64 *>((typeid_cast<const DataTypeNullable *>(constant_value_type.get()))->getNestedType().get());
-            else
-                date_time_type = typeid_cast<const DataTypeDateTime64 *>(constant_value_type.get());
-
+            const auto * date_time_type = typeid_cast<const DataTypeDateTime64 *>(constant_value_end_type.get());
             DecimalField<Decimal64> decimal_value;
             if (constant_value_literal.tryGet<DecimalField<Decimal64>>(decimal_value))
             {
diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
index 6999ba9662a..af06a622f8d 100644
--- a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
+++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
@@ -17,7 +17,7 @@ create table shard_0.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree
 create table shard_1.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id;
 create table distr_03222_dt64 (id UInt64, dt DateTime64(3)) engine = Distributed(test_cluster_two_shards_different_databases, '', dt64_03222);
 
-insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3))
+insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3));
 insert into shard_0.dt64_03222 values(2, toDateTime64('1970-01-01 00:00:02.456',3));
 insert into shard_1.dt64_03222 values(3, toDateTime64('1970-01-01 00:00:04.811',3));
 insert into shard_1.dt64_03222 values(4, toDateTime64('1970-01-01 00:10:05',3));

From 7d01c3131265d0dfae24fbf6ab91c71073573765 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 15 Aug 2024 16:01:13 +0200
Subject: [PATCH 146/363] Delete old code of named collections

---
 src/Core/PostgreSQL/PoolWithFailover.cpp      |   2 +-
 src/Core/PostgreSQL/PoolWithFailover.h        |   9 +-
 src/Dictionaries/HTTPDictionarySource.cpp     |  26 +-
 src/Dictionaries/MongoDBDictionarySource.cpp  |  61 ++--
 .../PostgreSQLDictionarySource.cpp            | 131 ++++++--
 .../ExternalDataSourceConfiguration.cpp       | 288 ------------------
 .../ExternalDataSourceConfiguration.h         |  92 ------
 src/Storages/NamedCollectionsHelpers.h        |   2 +-
 src/Storages/StorageExternalDistributed.h     |   2 -
 src/TableFunctions/TableFunctionMongoDB.cpp   |   1 -
 src/TableFunctions/TableFunctionRedis.cpp     |   1 -
 11 files changed, 170 insertions(+), 445 deletions(-)
 delete mode 100644 src/Storages/ExternalDataSourceConfiguration.cpp
 delete mode 100644 src/Storages/ExternalDataSourceConfiguration.h

diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp
index 5014564dbe0..054fc3b2226 100644
--- a/src/Core/PostgreSQL/PoolWithFailover.cpp
+++ b/src/Core/PostgreSQL/PoolWithFailover.cpp
@@ -23,7 +23,7 @@ namespace postgres
 {
 
 PoolWithFailover::PoolWithFailover(
-    const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority,
+    const ReplicasConfigurationByPriority & configurations_by_priority,
     size_t pool_size,
     size_t pool_wait_timeout_,
     size_t max_tries_,
diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h
index 502a9a9b7d7..2237c752367 100644
--- a/src/Core/PostgreSQL/PoolWithFailover.h
+++ b/src/Core/PostgreSQL/PoolWithFailover.h
@@ -8,7 +8,6 @@
 #include "ConnectionHolder.h"
 #include <mutex>
 #include <Poco/Util/AbstractConfiguration.h>
-#include <Storages/ExternalDataSourceConfiguration.h>
 #include <Storages/StoragePostgreSQL.h>
 
 
@@ -20,12 +19,12 @@ namespace postgres
 
 class PoolWithFailover
 {
-
-using RemoteDescription = std::vector<std::pair<String, uint16_t>>;
-
 public:
+    using ReplicasConfigurationByPriority = std::map<size_t, std::vector<DB::StoragePostgreSQL::Configuration>>;
+    using RemoteDescription = std::vector<std::pair<String, uint16_t>>;
+
     PoolWithFailover(
-        const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority,
+        const ReplicasConfigurationByPriority & configurations_by_priority,
         size_t pool_size,
         size_t pool_wait_timeout,
         size_t max_tries_,
diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp
index 663c63dd6c6..d6df03b39df 100644
--- a/src/Dictionaries/HTTPDictionarySource.cpp
+++ b/src/Dictionaries/HTTPDictionarySource.cpp
@@ -8,12 +8,12 @@
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
 #include <Processors/Formats/IInputFormat.h>
-#include <Storages/ExternalDataSourceConfiguration.h>
 #include <Poco/Net/HTTPRequest.h>
 #include <Common/logger_useful.h>
 #include "DictionarySourceFactory.h"
 #include "DictionarySourceHelpers.h"
 #include "DictionaryStructure.h"
+#include <Storages/NamedCollectionsHelpers.h>
 #include "registerDictionaries.h"
 
 
@@ -223,21 +223,23 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
         String endpoint;
         String format;
 
-        auto named_collection = created_from_ddl
-                            ? getURLBasedDataSourceConfiguration(config, settings_config_prefix, global_context)
-                            : std::nullopt;
+        auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr;
         if (named_collection)
         {
-            url = named_collection->configuration.url;
-            endpoint = named_collection->configuration.endpoint;
-            format = named_collection->configuration.format;
+            validateNamedCollection(
+                *named_collection,
+                /* required_keys */{},
+                /* optional_keys */ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>{
+                "url", "endpoint", "user", "credentials.user",  "password", "credentials.password", "format", "compression_method", "structure", "name"});
 
-            credentials.setUsername(named_collection->configuration.user);
-            credentials.setPassword(named_collection->configuration.password);
+            url = named_collection->getOrDefault<String>("url", "");
+            endpoint = named_collection->getOrDefault<String>("endpoint", "");
+            format = named_collection->getOrDefault<String>("format", "");
 
-            header_entries.reserve(named_collection->configuration.headers.size());
-            for (const auto & [key, value] : named_collection->configuration.headers)
-                header_entries.emplace_back(key, value);
+            credentials.setUsername(named_collection->getAnyOrDefault<String>({"user", "credentials.user"}, ""));
+            credentials.setPassword(named_collection->getAnyOrDefault<String>({"password", "credentials.password"}, ""));
+
+            header_entries = getHeadersFromNamedCollection(*named_collection);
         }
         else
         {
diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp
index c30a6f90e44..7bacfdab3d2 100644
--- a/src/Dictionaries/MongoDBDictionarySource.cpp
+++ b/src/Dictionaries/MongoDBDictionarySource.cpp
@@ -1,15 +1,12 @@
 #include "MongoDBDictionarySource.h"
 #include "DictionarySourceFactory.h"
 #include "DictionaryStructure.h"
-#include <Storages/ExternalDataSourceConfiguration.h>
 #include <Storages/StorageMongoDBSocketFactory.h>
+#include <Storages/NamedCollectionsHelpers.h>
 
 namespace DB
 {
 
-static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
-    "host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method", "options"};
-
 void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
 {
     auto create_mongo_db_dictionary = [](
@@ -22,35 +19,53 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
         bool created_from_ddl)
     {
         const auto config_prefix = root_config_prefix + ".mongodb";
-        ExternalDataSourceConfiguration configuration;
-        auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key); };
-        auto named_collection = getExternalDataSourceConfiguration(config, config_prefix, context, has_config_key);
+        auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, config_prefix, context) : nullptr;
+
+        String host, username, password, database, method, options, collection;
+        UInt16 port;
         if (named_collection)
         {
-            configuration = named_collection->configuration;
+            validateNamedCollection(
+                *named_collection,
+                /* required_keys */{"collection"},
+                /* optional_keys */ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>{
+                "host", "port", "user", "password", "db", "database", "uri", "name", "method", "options"});
+
+            host = named_collection->getOrDefault<String>("host", "");
+            port = static_cast<UInt16>(named_collection->getOrDefault<UInt64>("port", 0));
+            username = named_collection->getOrDefault<String>("user", "");
+            password = named_collection->getOrDefault<String>("password", "");
+            database = named_collection->getAnyOrDefault<String>({"db", "database"}, "");
+            method = named_collection->getOrDefault<String>("method", "");
+            collection = named_collection->getOrDefault<String>("collection", "");
+            options = named_collection->getOrDefault<String>("options", "");
         }
         else
         {
-            configuration.host = config.getString(config_prefix + ".host", "");
-            configuration.port = config.getUInt(config_prefix + ".port", 0);
-            configuration.username = config.getString(config_prefix + ".user", "");
-            configuration.password = config.getString(config_prefix + ".password", "");
-            configuration.database = config.getString(config_prefix + ".db", "");
+            host = config.getString(config_prefix + ".host", "");
+            port = config.getUInt(config_prefix + ".port", 0);
+            username = config.getString(config_prefix + ".user", "");
+            password = config.getString(config_prefix + ".password", "");
+            database = config.getString(config_prefix + ".db", "");
+            method = config.getString(config_prefix + ".method", "");
+            collection = config.getString(config_prefix + ".collection");
+            options = config.getString(config_prefix + ".options", "");
         }
 
         if (created_from_ddl)
-            context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port));
+            context->getRemoteHostFilter().checkHostAndPort(host, toString(port));
 
-        return std::make_unique<MongoDBDictionarySource>(dict_struct,
+        return std::make_unique<MongoDBDictionarySource>(
+            dict_struct,
             config.getString(config_prefix + ".uri", ""),
-            configuration.host,
-            configuration.port,
-            configuration.username,
-            configuration.password,
-            config.getString(config_prefix + ".method", ""),
-            configuration.database,
-            config.getString(config_prefix + ".collection"),
-            config.getString(config_prefix + ".options", ""),
+            host,
+            port,
+            username,
+            password,
+            method,
+            database,
+            collection,
+            options,
             sample_block);
     };
 
diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp
index f62a9a009d8..fd026a97cd4 100644
--- a/src/Dictionaries/PostgreSQLDictionarySource.cpp
+++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp
@@ -13,7 +13,7 @@
 #include "readInvalidateQuery.h"
 #include <Interpreters/Context.h>
 #include <QueryPipeline/QueryPipeline.h>
-#include <Storages/ExternalDataSourceConfiguration.h>
+#include <Storages/NamedCollectionsHelpers.h>
 #include <Common/logger_useful.h>
 #endif
 
@@ -30,7 +30,7 @@ namespace ErrorCodes
 
 static const UInt64 max_block_size = 8192;
 
-static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
+static const ValidateKeysMultiset<ExternalDatabaseEqualKeysSet> dictionary_allowed_keys = {
     "host", "port", "user", "password", "db", "database", "table", "schema",
     "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"};
 
@@ -179,6 +179,19 @@ std::string PostgreSQLDictionarySource::toString() const
 
 #endif
 
+static void validateConfigKeys(
+    const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix)
+{
+    Poco::Util::AbstractConfiguration::Keys config_keys;
+    dict_config.keys(config_prefix, config_keys);
+    for (const auto & config_key : config_keys)
+    {
+        if (dictionary_allowed_keys.contains(config_key) || startsWith(config_key, "replica"))
+            continue;
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key);
+    }
+}
+
 void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory)
 {
     auto create_table_source = [=](const DictionaryStructure & dict_struct,
@@ -191,38 +204,118 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory)
     {
 #if USE_LIBPQXX
         const auto settings_config_prefix = config_prefix + ".postgresql";
-        auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key) || key.starts_with("replica"); };
-        auto configuration = getExternalDataSourceConfigurationByPriority(config, settings_config_prefix, context, has_config_key);
         const auto & settings = context->getSettingsRef();
 
+        std::optional<PostgreSQLDictionarySource::Configuration> dictionary_configuration;
+        String database, schema, table;
+        postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority;
+
+        auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr;
+        if (named_collection)
+        {
+            validateNamedCollection<ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>>(*named_collection, {}, dictionary_allowed_keys);
+
+            StoragePostgreSQL::Configuration common_configuration;
+            common_configuration.host = named_collection->getOrDefault<String>("host", "");
+            common_configuration.port = named_collection->getOrDefault<UInt64>("port", 0);
+            common_configuration.username = named_collection->getOrDefault<String>("user", "");
+            common_configuration.password = named_collection->getOrDefault<String>("password", "");
+            common_configuration.database = named_collection->getAnyOrDefault<String>({"database", "db"}, "");
+            common_configuration.schema = named_collection->getOrDefault<String>("schema", "");
+            common_configuration.table = named_collection->getOrDefault<String>("table", "");
+
+            dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration{
+                .db = common_configuration.database,
+                .schema = common_configuration.schema,
+                .table = common_configuration.table,
+                .query = named_collection->getOrDefault<String>("query", ""),
+                .where = named_collection->getOrDefault<String>("where", ""),
+                .invalidate_query = named_collection->getOrDefault<String>("invalidate_query", ""),
+                .update_field = named_collection->getOrDefault<String>("update_field", ""),
+                .update_lag = named_collection->getOrDefault<UInt64>("update_lag", 1),
+            });
+
+            replicas_by_priority[0].emplace_back(common_configuration);
+        }
+        else
+        {
+            validateConfigKeys(config, settings_config_prefix);
+
+            StoragePostgreSQL::Configuration common_configuration;
+            common_configuration.host = config.getString(settings_config_prefix + ".host", "");
+            common_configuration.port = config.getUInt(settings_config_prefix + ".port", 0);
+            common_configuration.username = config.getString(settings_config_prefix + ".user", "");
+            common_configuration.password = config.getString(settings_config_prefix + ".password", "");
+            common_configuration.database = config.getString(fmt::format("{}.database", settings_config_prefix), config.getString(fmt::format("{}.db", settings_config_prefix), ""));
+            common_configuration.schema = config.getString(fmt::format("{}.schema", settings_config_prefix), "");
+            common_configuration.table = config.getString(fmt::format("{}.table", settings_config_prefix), "");
+
+            dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration
+            {
+                .db = common_configuration.database,
+                .schema = common_configuration.schema,
+                .table = common_configuration.table,
+                .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""),
+                .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""),
+                .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""),
+                .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""),
+                .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1)
+            });
+
+
+            if (config.has(settings_config_prefix + ".replica"))
+            {
+                Poco::Util::AbstractConfiguration::Keys config_keys;
+                config.keys(settings_config_prefix, config_keys);
+
+                for (const auto & config_key : config_keys)
+                {
+                    if (config_key.starts_with("replica"))
+                    {
+                        String replica_name = settings_config_prefix + "." + config_key;
+                        StoragePostgreSQL::Configuration replica_configuration{common_configuration};
+
+                        size_t priority = config.getInt(replica_name + ".priority", 0);
+                        replica_configuration.host = config.getString(replica_name + ".host", common_configuration.host);
+                        replica_configuration.port = config.getUInt(replica_name + ".port", common_configuration.port);
+                        replica_configuration.username = config.getString(replica_name + ".user", common_configuration.username);
+                        replica_configuration.password = config.getString(replica_name + ".password", common_configuration.password);
+
+                        if (replica_configuration.host.empty() || replica_configuration.port == 0
+                            || replica_configuration.username.empty() || replica_configuration.password.empty())
+                        {
+                            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                            "Named collection of connection parameters is missing some "
+                                            "of the parameters and no other dictionary parameters are added");
+                        }
+
+                        replicas_by_priority[priority].emplace_back(replica_configuration);
+                    }
+                }
+            }
+            else
+            {
+                replicas_by_priority[0].emplace_back(common_configuration);
+            }
+        }
         if (created_from_ddl)
         {
-            for (const auto & replicas : configuration.replicas_configurations)
-                for (const auto & replica : replicas.second)
+            for (const auto & [_, replicas] : replicas_by_priority)
+                for (const auto & replica : replicas)
                     context->getRemoteHostFilter().checkHostAndPort(replica.host, toString(replica.port));
         }
 
+
         auto pool = std::make_shared<postgres::PoolWithFailover>(
-            configuration.replicas_configurations,
+            replicas_by_priority,
             settings.postgresql_connection_pool_size,
             settings.postgresql_connection_pool_wait_timeout,
             settings.postgresql_connection_pool_retries,
             settings.postgresql_connection_pool_auto_close_connection,
             settings.postgresql_connection_attempt_timeout);
 
-        PostgreSQLDictionarySource::Configuration dictionary_configuration
-        {
-            .db = configuration.database,
-            .schema = configuration.schema,
-            .table = configuration.table,
-            .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""),
-            .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""),
-            .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""),
-            .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""),
-            .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1)
-        };
 
-        return std::make_unique<PostgreSQLDictionarySource>(dict_struct, dictionary_configuration, pool, sample_block);
+        return std::make_unique<PostgreSQLDictionarySource>(dict_struct, dictionary_configuration.value(), pool, sample_block);
 #else
         (void)dict_struct;
         (void)config;
diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp
deleted file mode 100644
index 41979f8d91c..00000000000
--- a/src/Storages/ExternalDataSourceConfiguration.cpp
+++ /dev/null
@@ -1,288 +0,0 @@
-#include "ExternalDataSourceConfiguration.h"
-
-#include <Interpreters/Context.h>
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTSelectWithUnionQuery.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <IO/WriteBufferFromString.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
-IMPLEMENT_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS)
-
-static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
-    "host", "port", "user", "password", "quota_key", "db",
-    "database", "table", "schema", "replica",
-    "update_field", "update_lag", "invalidate_query", "query",
-    "where", "name", "secure", "uri", "collection"};
-
-
-template<typename T>
-SettingsChanges getSettingsChangesFromConfig(
-    const BaseSettings<T> & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
-{
-    SettingsChanges config_settings;
-    for (const auto & setting : settings.all())
-    {
-        const auto & setting_name = setting.getName();
-        auto setting_value = config.getString(config_prefix + '.' + setting_name, "");
-        if (!setting_value.empty())
-            config_settings.emplace_back(setting_name, setting_value);
-    }
-    return config_settings;
-}
-
-
-String ExternalDataSourceConfiguration::toString() const
-{
-    WriteBufferFromOwnString configuration_info;
-    configuration_info << "username: " << username << "\t";
-    if (addresses.empty())
-    {
-        configuration_info << "host: " << host << "\t";
-        configuration_info << "port: " << port << "\t";
-    }
-    else
-    {
-        for (const auto & [replica_host, replica_port] : addresses)
-        {
-            configuration_info << "host: " << replica_host << "\t";
-            configuration_info << "port: " << replica_port << "\t";
-        }
-    }
-    return configuration_info.str();
-}
-
-
-void ExternalDataSourceConfiguration::set(const ExternalDataSourceConfiguration & conf)
-{
-    host = conf.host;
-    port = conf.port;
-    username = conf.username;
-    password = conf.password;
-    quota_key = conf.quota_key;
-    database = conf.database;
-    table = conf.table;
-    schema = conf.schema;
-    addresses = conf.addresses;
-    addresses_expr = conf.addresses_expr;
-}
-
-
-static void validateConfigKeys(
-    const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix, HasConfigKeyFunc has_config_key_func)
-{
-    Poco::Util::AbstractConfiguration::Keys config_keys;
-    dict_config.keys(config_prefix, config_keys);
-    for (const auto & config_key : config_keys)
-    {
-        if (!has_config_key_func(config_key))
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key);
-    }
-}
-
-template <typename T>
-std::optional<ExternalDataSourceInfo> getExternalDataSourceConfiguration(
-    const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix,
-    ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings<T> & settings)
-{
-    validateConfigKeys(dict_config, dict_config_prefix, has_config_key);
-    ExternalDataSourceConfiguration configuration;
-
-    auto collection_name = dict_config.getString(dict_config_prefix + ".name", "");
-    if (!collection_name.empty())
-    {
-        const auto & config = context->getConfigRef();
-        const auto & collection_prefix = fmt::format("named_collections.{}", collection_name);
-        validateConfigKeys(dict_config, collection_prefix, has_config_key);
-        auto config_settings = getSettingsChangesFromConfig(settings, config, collection_prefix);
-        auto dict_settings = getSettingsChangesFromConfig(settings, dict_config, dict_config_prefix);
-        /// dictionary config settings override collection settings.
-        config_settings.insert(config_settings.end(), dict_settings.begin(), dict_settings.end());
-
-        if (!config.has(collection_prefix))
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name);
-
-        configuration.host = dict_config.getString(dict_config_prefix + ".host", config.getString(collection_prefix + ".host", ""));
-        configuration.port = dict_config.getInt(dict_config_prefix + ".port", config.getUInt(collection_prefix + ".port", 0));
-        configuration.username = dict_config.getString(dict_config_prefix + ".user", config.getString(collection_prefix + ".user", ""));
-        configuration.password = dict_config.getString(dict_config_prefix + ".password", config.getString(collection_prefix + ".password", ""));
-        configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", config.getString(collection_prefix + ".quota_key", ""));
-        configuration.database = dict_config.getString(dict_config_prefix + ".db", config.getString(dict_config_prefix + ".database",
-            config.getString(collection_prefix + ".db", config.getString(collection_prefix + ".database", ""))));
-        configuration.table = dict_config.getString(dict_config_prefix + ".table", config.getString(collection_prefix + ".table", ""));
-        configuration.schema = dict_config.getString(dict_config_prefix + ".schema", config.getString(collection_prefix + ".schema", ""));
-
-        if (configuration.host.empty() || configuration.port == 0 || configuration.username.empty() || configuration.table.empty())
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                            "Named collection of connection parameters is missing some "
-                            "of the parameters and dictionary parameters are not added");
-        }
-        return ExternalDataSourceInfo{.configuration = configuration, .settings_changes = config_settings};
-    }
-    return std::nullopt;
-}
-
-std::optional<URLBasedDataSourceConfig> getURLBasedDataSourceConfiguration(
-    const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context)
-{
-    URLBasedDataSourceConfiguration configuration;
-    auto collection_name = dict_config.getString(dict_config_prefix + ".name", "");
-    if (!collection_name.empty())
-    {
-        const auto & config = context->getConfigRef();
-        const auto & collection_prefix = fmt::format("named_collections.{}", collection_name);
-
-        if (!config.has(collection_prefix))
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name);
-
-        configuration.url =
-            dict_config.getString(dict_config_prefix + ".url", config.getString(collection_prefix + ".url", ""));
-        configuration.endpoint =
-            dict_config.getString(dict_config_prefix + ".endpoint", config.getString(collection_prefix + ".endpoint", ""));
-        configuration.format =
-            dict_config.getString(dict_config_prefix + ".format", config.getString(collection_prefix + ".format", ""));
-        configuration.compression_method =
-            dict_config.getString(dict_config_prefix + ".compression", config.getString(collection_prefix + ".compression_method", ""));
-        configuration.structure =
-            dict_config.getString(dict_config_prefix + ".structure", config.getString(collection_prefix + ".structure", ""));
-        configuration.user =
-            dict_config.getString(dict_config_prefix + ".credentials.user", config.getString(collection_prefix + ".credentials.user", ""));
-        configuration.password =
-            dict_config.getString(dict_config_prefix + ".credentials.password", config.getString(collection_prefix + ".credentials.password", ""));
-
-        String headers_prefix;
-        const Poco::Util::AbstractConfiguration *headers_config = nullptr;
-        if (dict_config.has(dict_config_prefix + ".headers"))
-        {
-            headers_prefix = dict_config_prefix + ".headers";
-            headers_config = &dict_config;
-        }
-        else
-        {
-            headers_prefix = collection_prefix + ".headers";
-            headers_config = &config;
-        }
-
-        if (headers_config)
-        {
-            Poco::Util::AbstractConfiguration::Keys header_keys;
-            headers_config->keys(headers_prefix, header_keys);
-            headers_prefix += ".";
-            for (const auto & header : header_keys)
-            {
-                const auto header_prefix = headers_prefix + header;
-                configuration.headers.emplace_back(
-                    headers_config->getString(header_prefix + ".name"),
-                    headers_config->getString(header_prefix + ".value"));
-            }
-        }
-
-        return URLBasedDataSourceConfig{ .configuration = configuration };
-    }
-
-    return std::nullopt;
-}
-
-ExternalDataSourcesByPriority getExternalDataSourceConfigurationByPriority(
-    const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key)
-{
-    validateConfigKeys(dict_config, dict_config_prefix, has_config_key);
-    ExternalDataSourceConfiguration common_configuration;
-
-    auto named_collection = getExternalDataSourceConfiguration(dict_config, dict_config_prefix, context, has_config_key);
-    if (named_collection)
-    {
-        common_configuration = named_collection->configuration;
-    }
-    else
-    {
-        common_configuration.host = dict_config.getString(dict_config_prefix + ".host", "");
-        common_configuration.port = dict_config.getUInt(dict_config_prefix + ".port", 0);
-        common_configuration.username = dict_config.getString(dict_config_prefix + ".user", "");
-        common_configuration.password = dict_config.getString(dict_config_prefix + ".password", "");
-        common_configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", "");
-        common_configuration.database = dict_config.getString(dict_config_prefix + ".db", dict_config.getString(dict_config_prefix + ".database", ""));
-        common_configuration.table = dict_config.getString(fmt::format("{}.table", dict_config_prefix), "");
-        common_configuration.schema = dict_config.getString(fmt::format("{}.schema", dict_config_prefix), "");
-    }
-
-    ExternalDataSourcesByPriority configuration
-    {
-        .database = common_configuration.database,
-        .table = common_configuration.table,
-        .schema = common_configuration.schema,
-        .replicas_configurations = {}
-    };
-
-    if (dict_config.has(dict_config_prefix + ".replica"))
-    {
-        Poco::Util::AbstractConfiguration::Keys config_keys;
-        dict_config.keys(dict_config_prefix, config_keys);
-
-        for (const auto & config_key : config_keys)
-        {
-            if (config_key.starts_with("replica"))
-            {
-                ExternalDataSourceConfiguration replica_configuration(common_configuration);
-                String replica_name = dict_config_prefix + "." + config_key;
-                validateConfigKeys(dict_config, replica_name, has_config_key);
-
-                size_t priority = dict_config.getInt(replica_name + ".priority", 0);
-                replica_configuration.host = dict_config.getString(replica_name + ".host", common_configuration.host);
-                replica_configuration.port = dict_config.getUInt(replica_name + ".port", common_configuration.port);
-                replica_configuration.username = dict_config.getString(replica_name + ".user", common_configuration.username);
-                replica_configuration.password = dict_config.getString(replica_name + ".password", common_configuration.password);
-                replica_configuration.quota_key = dict_config.getString(replica_name + ".quota_key", common_configuration.quota_key);
-
-                if (replica_configuration.host.empty() || replica_configuration.port == 0
-                    || replica_configuration.username.empty() || replica_configuration.password.empty())
-                {
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                    "Named collection of connection parameters is missing some "
-                                    "of the parameters and no other dictionary parameters are added");
-                }
-
-                configuration.replicas_configurations[priority].emplace_back(replica_configuration);
-            }
-        }
-    }
-    else
-    {
-        configuration.replicas_configurations[0].emplace_back(common_configuration);
-    }
-
-    return configuration;
-}
-
-
-void URLBasedDataSourceConfiguration::set(const URLBasedDataSourceConfiguration & conf)
-{
-    url = conf.url;
-    format = conf.format;
-    compression_method = conf.compression_method;
-    structure = conf.structure;
-    http_method = conf.http_method;
-    headers = conf.headers;
-}
-
-template
-std::optional<ExternalDataSourceInfo> getExternalDataSourceConfiguration(
-    const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix,
-    ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings<EmptySettingsTraits> & settings);
-
-template
-SettingsChanges getSettingsChangesFromConfig(
-    const BaseSettings<EmptySettingsTraits> & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
-
-}
diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h
deleted file mode 100644
index c703c9ce999..00000000000
--- a/src/Storages/ExternalDataSourceConfiguration.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#pragma once
-
-#include <Interpreters/Context.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <IO/S3Settings.h>
-#include <IO/HTTPHeaderEntries.h>
-
-
-namespace DB
-{
-
-#define EMPTY_SETTINGS(M, ALIAS)
-DECLARE_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS)
-
-struct EmptySettings : public BaseSettings<EmptySettingsTraits> {};
-
-struct ExternalDataSourceConfiguration
-{
-    String host;
-    UInt16 port = 0;
-    String username = "default";
-    String password;
-    String quota_key;
-    String database;
-    String table;
-    String schema;
-
-    std::vector<std::pair<String, UInt16>> addresses; /// Failover replicas.
-    String addresses_expr;
-
-    String toString() const;
-
-    void set(const ExternalDataSourceConfiguration & conf);
-};
-
-
-using StorageSpecificArgs = std::vector<std::pair<String, ASTPtr>>;
-
-struct ExternalDataSourceInfo
-{
-    ExternalDataSourceConfiguration configuration;
-    SettingsChanges settings_changes;
-};
-
-using HasConfigKeyFunc = std::function<bool(const String &)>;
-
-template <typename T = EmptySettingsTraits>
-std::optional<ExternalDataSourceInfo> getExternalDataSourceConfiguration(
-    const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix,
-    ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings<T> & settings = {});
-
-
-/// Highest priority is 0, the bigger the number in map, the less the priority.
-using ExternalDataSourcesConfigurationByPriority = std::map<size_t, std::vector<ExternalDataSourceConfiguration>>;
-
-struct ExternalDataSourcesByPriority
-{
-    String database;
-    String table;
-    String schema;
-    ExternalDataSourcesConfigurationByPriority replicas_configurations;
-};
-
-ExternalDataSourcesByPriority
-getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key);
-
-struct URLBasedDataSourceConfiguration
-{
-    String url;
-    String endpoint;
-    String format = "auto";
-    String compression_method = "auto";
-    String structure = "auto";
-
-    String user;
-    String password;
-
-    HTTPHeaderEntries headers;
-    String http_method;
-
-    void set(const URLBasedDataSourceConfiguration & conf);
-};
-
-struct URLBasedDataSourceConfig
-{
-    URLBasedDataSourceConfiguration configuration;
-};
-
-std::optional<URLBasedDataSourceConfig> getURLBasedDataSourceConfiguration(
-    const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context);
-
-}
diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h
index f444a581eb6..bf2da7235a2 100644
--- a/src/Storages/NamedCollectionsHelpers.h
+++ b/src/Storages/NamedCollectionsHelpers.h
@@ -133,7 +133,7 @@ void validateNamedCollection(
         {
              throw Exception(
                  ErrorCodes::BAD_ARGUMENTS,
-                 "Unexpected key {} in named collection. Required keys: {}, optional keys: {}",
+                 "Unexpected key `{}` in named collection. Required keys: {}, optional keys: {}",
                  backQuoteIfNeed(key), fmt::join(required_keys, ", "), fmt::join(optional_keys, ", "));
         }
     }
diff --git a/src/Storages/StorageExternalDistributed.h b/src/Storages/StorageExternalDistributed.h
index c4d37c3e5cc..56c7fe86f34 100644
--- a/src/Storages/StorageExternalDistributed.h
+++ b/src/Storages/StorageExternalDistributed.h
@@ -8,8 +8,6 @@
 namespace DB
 {
 
-struct ExternalDataSourceConfiguration;
-
 /// Storages MySQL and PostgreSQL use ConnectionPoolWithFailover and support multiple replicas.
 /// This class unites multiple storages with replicas into multiple shards with replicas.
 /// A query to external database is passed to one replica on each shard, the result is united.
diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp
index b2cf1b4675e..94279d1bf6d 100644
--- a/src/TableFunctions/TableFunctionMongoDB.cpp
+++ b/src/TableFunctions/TableFunctionMongoDB.cpp
@@ -1,5 +1,4 @@
 #include <Storages/StorageMongoDB.h>
-#include <Storages/ExternalDataSourceConfiguration.h>
 
 #include <Common/Exception.h>
 
diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp
index f87ba6d1c6d..aca751c2840 100644
--- a/src/TableFunctions/TableFunctionRedis.cpp
+++ b/src/TableFunctions/TableFunctionRedis.cpp
@@ -15,7 +15,6 @@
 
 #include <Storages/StorageRedis.h>
 #include <TableFunctions/ITableFunction.h>
-#include <Storages/ExternalDataSourceConfiguration.h>
 
 
 namespace DB

From f6e1eb1643888c2b8bbc179899cbc4bacaee5b78 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 15 Aug 2024 16:31:48 +0200
Subject: [PATCH 147/363] Fix style check

---
 src/Dictionaries/HTTPDictionarySource.cpp       | 2 +-
 src/Dictionaries/PostgreSQLDictionarySource.cpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp
index d6df03b39df..bf19f912723 100644
--- a/src/Dictionaries/HTTPDictionarySource.cpp
+++ b/src/Dictionaries/HTTPDictionarySource.cpp
@@ -230,7 +230,7 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
                 *named_collection,
                 /* required_keys */{},
                 /* optional_keys */ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>{
-                "url", "endpoint", "user", "credentials.user",  "password", "credentials.password", "format", "compression_method", "structure", "name"});
+                "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"});
 
             url = named_collection->getOrDefault<String>("url", "");
             endpoint = named_collection->getOrDefault<String>("endpoint", "");
diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp
index fd026a97cd4..fd426de126d 100644
--- a/src/Dictionaries/PostgreSQLDictionarySource.cpp
+++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp
@@ -24,6 +24,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int SUPPORT_IS_DISABLED;
+    extern const int BAD_ARGUMENTS;
 }
 
 #if USE_LIBPQXX

From 077f10a4ada2a561111207d8e99e22d2c8e48f40 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 15 Aug 2024 18:26:48 +0200
Subject: [PATCH 148/363] Fix build

---
 src/Dictionaries/PostgreSQLDictionarySource.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp
index fd426de126d..8e472f85a6e 100644
--- a/src/Dictionaries/PostgreSQLDictionarySource.cpp
+++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp
@@ -4,6 +4,7 @@
 #include <Core/QualifiedTableName.h>
 #include <Core/Settings.h>
 #include "DictionarySourceFactory.h"
+#include <Storages/NamedCollectionsHelpers.h>
 #include "registerDictionaries.h"
 
 #if USE_LIBPQXX
@@ -13,7 +14,6 @@
 #include "readInvalidateQuery.h"
 #include <Interpreters/Context.h>
 #include <QueryPipeline/QueryPipeline.h>
-#include <Storages/NamedCollectionsHelpers.h>
 #include <Common/logger_useful.h>
 #endif
 
@@ -27,14 +27,14 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-#if USE_LIBPQXX
-
-static const UInt64 max_block_size = 8192;
-
 static const ValidateKeysMultiset<ExternalDatabaseEqualKeysSet> dictionary_allowed_keys = {
     "host", "port", "user", "password", "db", "database", "table", "schema",
     "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"};
 
+#if USE_LIBPQXX
+
+static const UInt64 max_block_size = 8192;
+
 namespace
 {
     ExternalQueryBuilder makeExternalQueryBuilder(const DictionaryStructure & dict_struct, const String & schema, const String & table, const String & query, const String & where)
@@ -178,8 +178,6 @@ std::string PostgreSQLDictionarySource::toString() const
     return "PostgreSQL: " + configuration.db + '.' + configuration.table + (where.empty() ? "" : ", where: " + where);
 }
 
-#endif
-
 static void validateConfigKeys(
     const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix)
 {
@@ -193,6 +191,8 @@ static void validateConfigKeys(
     }
 }
 
+#endif
+
 void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory)
 {
     auto create_table_source = [=](const DictionaryStructure & dict_struct,

From a49ef43286e31525f8829e3307a9231be0ce417c Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 15 Aug 2024 21:33:26 +0000
Subject: [PATCH 149/363] Fix 01119_session_log flakiness

---
 ...9_session_log.sql => 01119_session_log.sh} | 37 +++++++++++++++----
 1 file changed, 30 insertions(+), 7 deletions(-)
 rename tests/queries/0_stateless/{01119_session_log.sql => 01119_session_log.sh} (73%)
 mode change 100644 => 100755

diff --git a/tests/queries/0_stateless/01119_session_log.sql b/tests/queries/0_stateless/01119_session_log.sh
old mode 100644
new mode 100755
similarity index 73%
rename from tests/queries/0_stateless/01119_session_log.sql
rename to tests/queries/0_stateless/01119_session_log.sh
index 55f6228797a..809d300fada
--- a/tests/queries/0_stateless/01119_session_log.sql
+++ b/tests/queries/0_stateless/01119_session_log.sh
@@ -1,5 +1,20 @@
--- Tags: no-fasttest
+#!/usr/bin/env bash
+# Tags: no-fasttest
 
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+session_log_query_prefix="
+system flush logs;
+select distinct type, user, auth_type, toString(client_address)!='::ffff:0.0.0.0' as a, client_port!=0 as b, interface from system.session_log
+where user in ('default', 'nonexistsnt_user_1119', '   ', ' INTERSERVER SECRET ')
+and interface in ('HTTP', 'TCP', 'TCP_Interserver')
+and (user != 'default' or (a=1 and b=1)) -- FIXME: we should not write uninitialized address and port (but we do sometimes)
+and event_time >= now() - interval 5 minute"
+
+$CLICKHOUSE_CLIENT -nm -q "
 select * from remote('127.0.0.2', system, one, 'default', '');
 select * from remote('127.0.0.2', system, one, 'default', 'wrong password'); -- { serverError AUTHENTICATION_FAILED }
 select * from remote('127.0.0.2', system, one, 'nonexistsnt_user_1119', ''); -- { serverError AUTHENTICATION_FAILED }
@@ -16,9 +31,17 @@ select * from url('http://127.0.0.1:8123/?query=select+1&user=+++', LineAsString
 
 select * from cluster('test_cluster_interserver_secret', system, one);
 
-system flush logs;
-select distinct type, user, auth_type, toString(client_address)!='::ffff:0.0.0.0' as a, client_port!=0 as b, interface from system.session_log
-where user in ('default', 'nonexistsnt_user_1119', '   ', ' INTERSERVER SECRET ')
-and interface in ('HTTP', 'TCP', 'TCP_Interserver')
-and (user != 'default' or (a=1 and b=1)) -- FIXME: we should not write uninitialized address and port (but we do sometimes)
-and event_time >= now() - interval 5 minute order by type, user, interface;
+$session_log_query_prefix and type != 'Logout' order by type, user, interface;
+"
+
+# Wait for logout events.
+for attempt in {1..10}
+do
+    if [ "`$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout'" | wc -l`" -eq 3 ]
+    then
+        break
+    fi
+    sleep 2
+done
+
+$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout' order by user, interface"

From 253188381759d062967c9bb3b9d0907f30eab61a Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Fri, 16 Aug 2024 05:39:50 +0000
Subject: [PATCH 150/363] she sells seashells by seashore the shells that she
 sells are seashells im sure

---
 tests/queries/0_stateless/01119_session_log.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01119_session_log.sh b/tests/queries/0_stateless/01119_session_log.sh
index 809d300fada..2d17b545276 100755
--- a/tests/queries/0_stateless/01119_session_log.sh
+++ b/tests/queries/0_stateless/01119_session_log.sh
@@ -35,7 +35,7 @@ $session_log_query_prefix and type != 'Logout' order by type, user, interface;
 "
 
 # Wait for logout events.
-for attempt in {1..10}
+for _ in {1..10}
 do
     if [ "`$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout'" | wc -l`" -eq 3 ]
     then

From a85f544205f2e782d4f6c16c0622728475db3571 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 16 Aug 2024 08:47:28 +0000
Subject: [PATCH 151/363] Update analyzer_tech_debt.txt

---
 tests/analyzer_tech_debt.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index bd92465e1aa..c8edbdc5932 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -1,4 +1,3 @@
 01624_soft_constraints
-02354_vector_search_queries
 # Check after ConstantNode refactoring
 02944_variant_as_common_type

From 1b49e2492521c54b0e6240d412af847d3fa21221 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 16 Aug 2024 11:26:31 +0200
Subject: [PATCH 152/363] Fix clang-tidy

---
 src/Dictionaries/PostgreSQLDictionarySource.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp
index 8e472f85a6e..b1bab17e2e9 100644
--- a/src/Dictionaries/PostgreSQLDictionarySource.cpp
+++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp
@@ -208,7 +208,6 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory)
         const auto & settings = context->getSettingsRef();
 
         std::optional<PostgreSQLDictionarySource::Configuration> dictionary_configuration;
-        String database, schema, table;
         postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority;
 
         auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr;

From 60a6e893a40761eb46655e76cb6a3fe5f177019c Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 16 Aug 2024 17:56:12 +0800
Subject: [PATCH 153/363] first commit

---
 src/Common/examples/CMakeLists.txt            |   5 +
 src/Common/examples/utf8_upper_lower.cpp      |  27 ++
 src/Functions/LowerUpperImpl.h                |   1 -
 src/Functions/LowerUpperUTF8Impl.h            | 283 +++---------------
 src/Functions/initcapUTF8.cpp                 |   3 +-
 src/Functions/lowerUTF8.cpp                   |  25 +-
 src/Functions/upperUTF8.cpp                   |  24 +-
 .../00170_lower_upper_utf8.reference          |   4 +
 .../0_stateless/00170_lower_upper_utf8.sql    |  11 +
 .../00233_position_function_family.sql        |   3 +
 .../0_stateless/00761_lower_utf8_bug.sql      |   3 +
 .../0_stateless/01278_random_string_utf8.sql  |   3 +
 .../0_stateless/01431_utf8_ubsan.reference    |   4 +-
 .../queries/0_stateless/01431_utf8_ubsan.sql  |   3 +
 .../0_stateless/01590_countSubstrings.sql     |   3 +
 ...71_lower_upper_utf8_row_overlaps.reference |   4 +-
 .../02071_lower_upper_utf8_row_overlaps.sql   |   3 +
 ...new_functions_must_be_documented.reference |   2 -
 .../02514_if_with_lazy_low_cardinality.sql    |   3 +
 .../0_stateless/02807_lower_utf8_msan.sql     |   3 +
 tests/queries/0_stateless/03015_peder1001.sql |   3 +
 21 files changed, 159 insertions(+), 261 deletions(-)
 create mode 100644 src/Common/examples/utf8_upper_lower.cpp

diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt
index 69580d4ad0e..8383e80d09d 100644
--- a/src/Common/examples/CMakeLists.txt
+++ b/src/Common/examples/CMakeLists.txt
@@ -92,3 +92,8 @@ endif()
 
 clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp)
 target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config)
+
+if (TARGET ch_contrib::icu)
+    clickhouse_add_executable (utf8_upper_lower utf8_upper_lower.cpp)
+    target_link_libraries (utf8_upper_lower PRIVATE ch_contrib::icu)
+endif ()
diff --git a/src/Common/examples/utf8_upper_lower.cpp b/src/Common/examples/utf8_upper_lower.cpp
new file mode 100644
index 00000000000..826e1763105
--- /dev/null
+++ b/src/Common/examples/utf8_upper_lower.cpp
@@ -0,0 +1,27 @@
+#include <iostream>
+#include <unicode/unistr.h>
+
+std::string utf8_to_lower(const std::string & input)
+{
+    icu::UnicodeString unicodeInput(input.c_str(), "UTF-8");
+    unicodeInput.toLower();
+    std::string output;
+    unicodeInput.toUTF8String(output);
+    return output;
+}
+
+std::string utf8_to_upper(const std::string & input)
+{
+    icu::UnicodeString unicodeInput(input.c_str(), "UTF-8");
+    unicodeInput.toUpper();
+    std::string output;
+    unicodeInput.toUTF8String(output);
+    return output;
+}
+
+int main()
+{
+    std::string input = "ır";
+    std::cout << "upper:" << utf8_to_upper(input) << std::endl;
+    return 0;
+}
diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h
index d463ef96e16..a52703d10c8 100644
--- a/src/Functions/LowerUpperImpl.h
+++ b/src/Functions/LowerUpperImpl.h
@@ -1,7 +1,6 @@
 #pragma once
 #include <Columns/ColumnString.h>
 
-
 namespace DB
 {
 
diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index eedabca5b22..5da085f48e5 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -1,15 +1,14 @@
 #pragma once
+
+#include "config.h"
+
+#if USE_ICU
+
 #include <Columns/ColumnString.h>
 #include <Functions/LowerUpperImpl.h>
-#include <base/defines.h>
-#include <Poco/UTF8Encoding.h>
+#include <base/find_symbols.h>
+#include <unicode/unistr.h>
 #include <Common/StringUtils.h>
-#include <Common/UTF8Helpers.h>
-
-#ifdef __SSE2__
-#include <emmintrin.h>
-#endif
-
 
 namespace DB
 {
@@ -19,71 +18,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-/// xor or do nothing
-template <bool>
-UInt8 xor_or_identity(const UInt8 c, const int mask)
-{
-    return c ^ mask;
-}
-
-template <>
-inline UInt8 xor_or_identity<false>(const UInt8 c, const int)
-{
-    return c;
-}
-
-/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array
-template <bool to_lower>
-inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst)
-{
-    if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu))
-    {
-        /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x1);
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x10);
-    }
-    else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu))
-    {
-        /// ѐёђѓєѕіїјљњћќѝўџ
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x1);
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x10);
-    }
-    else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu))
-    {
-        /// А-П
-        *dst++ = *src++;
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x20);
-    }
-    else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu))
-    {
-        /// а-п
-        *dst++ = *src++;
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x20);
-    }
-    else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu))
-    {
-        /// Р-Я
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x1);
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x20);
-    }
-    else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu))
-    {
-        /// р-я
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x1);
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x20);
-    }
-}
-
-
-/** If the string contains UTF-8 encoded text, convert it to the lower (upper) case.
-  * Note: It is assumed that after the character is converted to another case,
-  *  the length of its multibyte sequence in UTF-8 does not change.
-  * Otherwise, the behavior is undefined.
-  */
-template <char not_case_lower_bound,
-    char not_case_upper_bound,
-    int to_case(int),
-    void cyrillic_to_case(const UInt8 *&, UInt8 *&)>
+template <char not_case_lower_bound, char not_case_upper_bound, bool upper>
 struct LowerUpperUTF8Impl
 {
     static void vector(
@@ -103,180 +38,46 @@ struct LowerUpperUTF8Impl
             return;
         }
 
-        res_data.resize_exact(data.size());
-        res_offsets.assign(offsets);
-        array(data.data(), data.data() + data.size(), offsets, res_data.data());
+        res_data.resize(data.size());
+        res_offsets.resize_exact(offsets.size());
+
+        String output;
+        size_t curr_offset = 0;
+        for (size_t i = 0; i < offsets.size(); ++i)
+        {
+            const auto * data_start = reinterpret_cast<const char *>(&data[offsets[i - 1]]);
+            size_t size = offsets[i] - offsets[i - 1];
+
+            icu::UnicodeString input(data_start, static_cast<int32_t>(size), "UTF-8");
+            if constexpr (upper)
+                input.toUpper();
+            else
+                input.toLower();
+
+            output.clear();
+            input.toUTF8String(output);
+
+            /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first
+            /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this
+            /// case, the behavior is also reasonable.
+            const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size());
+            size_t valid_size = res_end ? res_end - output.data() + 1 : 0;
+
+            res_data.resize(curr_offset + valid_size + 1);
+            memcpy(&res_data[curr_offset], output.data(), valid_size);
+            res_data[curr_offset + valid_size] = 0;
+
+            curr_offset += valid_size + 1;
+            res_offsets[i] = curr_offset;
+        }
     }
 
     static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
     {
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument");
     }
-
-    /** Converts a single code point starting at `src` to desired case, storing result starting at `dst`.
-     *    `src` and `dst` are incremented by corresponding sequence lengths. */
-    static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial)
-    {
-        if (src[0] <= ascii_upper_bound)
-        {
-            if (*src >= not_case_lower_bound && *src <= not_case_upper_bound)
-                *dst++ = *src++ ^ flip_case_mask;
-            else
-                *dst++ = *src++;
-        }
-        else if (src + 1 < src_end
-            && ((src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0xBFu)) || (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x9Fu))))
-        {
-            cyrillic_to_case(src, dst);
-        }
-        else if (src + 1 < src_end && src[0] == 0xC2u)
-        {
-            /// Punctuation U+0080 - U+00BF, UTF-8: C2 80 - C2 BF
-            *dst++ = *src++;
-            *dst++ = *src++;
-        }
-        else if (src + 2 < src_end && src[0] == 0xE2u)
-        {
-            /// Characters U+2000 - U+2FFF, UTF-8: E2 80 80 - E2 BF BF
-            *dst++ = *src++;
-            *dst++ = *src++;
-            *dst++ = *src++;
-        }
-        else
-        {
-            size_t src_sequence_length = UTF8::seqLength(*src);
-            /// In case partial buffer was passed (due to SSE optimization)
-            /// we cannot convert it with current src_end, but we may have more
-            /// bytes to convert and eventually got correct symbol.
-            if (partial && src_sequence_length > static_cast<size_t>(src_end - src))
-                return false;
-
-            auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
-            if (src_code_point)
-            {
-                int dst_code_point = to_case(*src_code_point);
-                if (dst_code_point > 0)
-                {
-                    size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
-                    assert(dst_sequence_length <= 4);
-
-                    /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
-                    /// As an example, this happens for ß and ẞ.
-                    if (dst_sequence_length == src_sequence_length)
-                    {
-                        src += dst_sequence_length;
-                        dst += dst_sequence_length;
-                        return true;
-                    }
-                }
-            }
-
-            *dst = *src;
-            ++dst;
-            ++src;
-        }
-
-        return true;
-    }
-
-private:
-    static constexpr auto ascii_upper_bound = '\x7f';
-    static constexpr auto flip_case_mask = 'A' ^ 'a';
-
-    static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
-    {
-        const auto * offset_it = offsets.begin();
-        const UInt8 * begin = src;
-
-#ifdef __SSE2__
-        static constexpr auto bytes_sse = sizeof(__m128i);
-
-        /// If we are before this position, we can still read at least bytes_sse.
-        const auto * src_end_sse = src_end - bytes_sse + 1;
-
-        /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f)
-        const auto v_zero = _mm_setzero_si128();
-        const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1);
-        const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1);
-        const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask);
-
-        while (src < src_end_sse)
-        {
-            const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
-
-            /// check for ASCII
-            const auto is_not_ascii = _mm_cmplt_epi8(chars, v_zero);
-            const auto mask_is_not_ascii = _mm_movemask_epi8(is_not_ascii);
-
-            /// ASCII
-            if (mask_is_not_ascii == 0)
-            {
-                const auto is_not_case
-                    = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound));
-                const auto mask_is_not_case = _mm_movemask_epi8(is_not_case);
-
-                /// everything in correct case ASCII
-                if (mask_is_not_case == 0)
-                    _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars);
-                else
-                {
-                    /// ASCII in mixed case
-                    /// keep `flip_case_mask` only where necessary, zero out elsewhere
-                    const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case);
-
-                    /// flip case by applying calculated mask
-                    const auto cased_chars = _mm_xor_si128(chars, xor_mask);
-
-                    /// store result back to destination
-                    _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars);
-                }
-
-                src += bytes_sse;
-                dst += bytes_sse;
-            }
-            else
-            {
-                /// UTF-8
-
-                /// Find the offset of the next string after src
-                size_t offset_from_begin = src - begin;
-                while (offset_from_begin >= *offset_it)
-                    ++offset_it;
-
-                /// Do not allow one row influence another (since row may have invalid sequence, and break the next)
-                const UInt8 * row_end = begin + *offset_it;
-                chassert(row_end >= src);
-                const UInt8 * expected_end = std::min(src + bytes_sse, row_end);
-
-                while (src < expected_end)
-                {
-                    if (!toCase(src, expected_end, dst, /* partial= */ true))
-                    {
-                        /// Fallback to handling byte by byte.
-                        src_end_sse = src;
-                        break;
-                    }
-                }
-            }
-        }
-
-        /// Find the offset of the next string after src
-        size_t offset_from_begin = src - begin;
-        while (offset_it != offsets.end() && offset_from_begin >= *offset_it)
-            ++offset_it;
-#endif
-
-        /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
-        while (src < src_end)
-        {
-            const UInt8 * row_end = begin + *offset_it;
-            chassert(row_end >= src);
-
-            while (src < row_end)
-                toCase(src, row_end, dst, /* partial= */ false);
-            ++offset_it;
-        }
-    }
 };
 
 }
+
+#endif
diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp
index 282d846094e..004586dce26 100644
--- a/src/Functions/initcapUTF8.cpp
+++ b/src/Functions/initcapUTF8.cpp
@@ -1,9 +1,8 @@
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionStringToString.h>
-#include <Functions/LowerUpperUTF8Impl.h>
 #include <Functions/FunctionFactory.h>
 #include <Poco/Unicode.h>
-
+#include <Common/UTF8Helpers.h>
 
 namespace DB
 {
diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp
index 7adb0069121..e2f7cb84730 100644
--- a/src/Functions/lowerUTF8.cpp
+++ b/src/Functions/lowerUTF8.cpp
@@ -1,9 +1,10 @@
-#include <DataTypes/DataTypeString.h>
+#include "config.h"
+
+#if USE_ICU
+
+#include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
 #include <Functions/LowerUpperUTF8Impl.h>
-#include <Functions/FunctionFactory.h>
-#include <Poco/Unicode.h>
-
 
 namespace DB
 {
@@ -15,13 +16,25 @@ struct NameLowerUTF8
     static constexpr auto name = "lowerUTF8";
 };
 
-using FunctionLowerUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'A', 'Z', Poco::Unicode::toLower, UTF8CyrillicToCase<true>>, NameLowerUTF8>;
+using FunctionLowerUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'A', 'Z', false>, NameLowerUTF8>;
 
 }
 
 REGISTER_FUNCTION(LowerUTF8)
 {
-    factory.registerFunction<FunctionLowerUTF8>();
+    FunctionDocumentation::Description description
+        = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)";
+    FunctionDocumentation::Syntax syntax = "lowerUTF8(input)";
+    FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}};
+    FunctionDocumentation::ReturnedValue returned_value = "A String data type value";
+    FunctionDocumentation::Examples examples = {
+        {"first", "SELECT lowerUTF8('München') as Lowerutf8;", "münchen"},
+    };
+    FunctionDocumentation::Categories categories = {"String"};
+
+    factory.registerFunction<FunctionLowerUTF8>({description, syntax, arguments, returned_value, examples, categories});
 }
 
 }
+
+#endif
diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp
index 659e67f0ef3..ef26430331f 100644
--- a/src/Functions/upperUTF8.cpp
+++ b/src/Functions/upperUTF8.cpp
@@ -1,8 +1,10 @@
+#include "config.h"
+
+#if USE_ICU
+
+#include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
 #include <Functions/LowerUpperUTF8Impl.h>
-#include <Functions/FunctionFactory.h>
-#include <Poco/Unicode.h>
-
 
 namespace DB
 {
@@ -14,13 +16,25 @@ struct NameUpperUTF8
     static constexpr auto name = "upperUTF8";
 };
 
-using FunctionUpperUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'a', 'z', Poco::Unicode::toUpper, UTF8CyrillicToCase<false>>, NameUpperUTF8>;
+using FunctionUpperUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'a', 'z', true>, NameUpperUTF8>;
 
 }
 
 REGISTER_FUNCTION(UpperUTF8)
 {
-    factory.registerFunction<FunctionUpperUTF8>();
+    FunctionDocumentation::Description description
+        = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)";
+    FunctionDocumentation::Syntax syntax = "upperUTF8(input)";
+    FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}};
+    FunctionDocumentation::ReturnedValue returned_value = "A String data type value";
+    FunctionDocumentation::Examples examples = {
+        {"first", "SELECT upperUTF8('München') as Upperutf8;", "MÜNCHEN"},
+    };
+    FunctionDocumentation::Categories categories = {"String"};
+
+    factory.registerFunction<FunctionUpperUTF8>({description, syntax, arguments, returned_value, examples, categories});
 }
 
 }
+
+#endif
diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference
index f202cb75513..3c644f22b9b 100644
--- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference
+++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference
@@ -22,3 +22,7 @@
 1
 1
 1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql
index 4caba2033ff..85b6c5c6095 100644
--- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql
+++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str;
 select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str;
 select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa';
@@ -27,3 +30,11 @@ select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВ
 select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n;
 select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() from system.one array join range(16384) as n;
 select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n;
+
+-- Turkish language
+select upperUTF8('ır') = 'IR';
+select lowerUTF8('ır') = 'ır';
+
+-- German language
+select upper('öäüß') = 'öäüß';
+select lower('ÖÄÜẞ') = 'ÖÄÜẞ';
diff --git a/tests/queries/0_stateless/00233_position_function_family.sql b/tests/queries/0_stateless/00233_position_function_family.sql
index dd7394bc39a..d6668cb7ba4 100644
--- a/tests/queries/0_stateless/00233_position_function_family.sql
+++ b/tests/queries/0_stateless/00233_position_function_family.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SET send_logs_level = 'fatal';
 
 select 1 = position('', '');
diff --git a/tests/queries/0_stateless/00761_lower_utf8_bug.sql b/tests/queries/0_stateless/00761_lower_utf8_bug.sql
index de20b894331..a0ab55edc15 100644
--- a/tests/queries/0_stateless/00761_lower_utf8_bug.sql
+++ b/tests/queries/0_stateless/00761_lower_utf8_bug.sql
@@ -1 +1,4 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SELECT lowerUTF8('\xF0') = lowerUTF8('\xF0');
diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql
index da2dc48c3e1..290d6a0c759 100644
--- a/tests/queries/0_stateless/01278_random_string_utf8.sql
+++ b/tests/queries/0_stateless/01278_random_string_utf8.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SELECT randomStringUTF8('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT lengthUTF8(randomStringUTF8(100));
 SELECT toTypeName(randomStringUTF8(10));
diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference
index c98c950d535..dc785e57851 100644
--- a/tests/queries/0_stateless/01431_utf8_ubsan.reference
+++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference
@@ -1,2 +1,2 @@
-FF
-FF
+EFBFBD
+EFBFBD
diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql
index d6a299225b1..3a28e023805 100644
--- a/tests/queries/0_stateless/01431_utf8_ubsan.sql
+++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql
@@ -1,2 +1,5 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SELECT hex(lowerUTF8('\xFF'));
 SELECT hex(upperUTF8('\xFF'));
diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql
index b38cbb7d188..5ec4f412d7f 100644
--- a/tests/queries/0_stateless/01590_countSubstrings.sql
+++ b/tests/queries/0_stateless/01590_countSubstrings.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 --
 -- countSubstrings
 --
diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
index a3bac432482..deabef61a88 100644
--- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
+++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
@@ -5,9 +5,9 @@ insert into utf8_overlap values ('\xe2'), ('Foo⚊BarBazBam'), ('\xe2'), ('Foo
 --                                             MONOGRAM FOR YANG
 with lowerUTF8(str) as l_, upperUTF8(str) as u_, '0x' || hex(str) as h_
 select length(str), if(l_ == '\xe2', h_, l_), if(u_ == '\xe2', h_, u_) from utf8_overlap format CSV;
-1,"0xE2","0xE2"
+1,"�","�"
 15,"foo⚊barbazbam","FOO⚊BARBAZBAM"
-1,"0xE2","0xE2"
+1,"�","�"
 15,"foo⚊barbazbam","FOO⚊BARBAZBAM"
 -- NOTE: regression test for introduced bug
 -- https://github.com/ClickHouse/ClickHouse/issues/42756
diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
index 8ca0a3f5f75..d175e0659d0 100644
--- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
+++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 drop table if exists utf8_overlap;
 create table utf8_overlap (str String) engine=Memory();
 
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index c39f1fb1ce9..0980e25b70f 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -416,7 +416,6 @@ logTrace
 lowCardinalityIndices
 lowCardinalityKeys
 lower
-lowerUTF8
 makeDate
 makeDate32
 makeDateTime
@@ -897,7 +896,6 @@ tupleToNameValuePairs
 unbin
 unhex
 upper
-upperUTF8
 uptime
 validateNestedArraySizes
 version
diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
index 80e3c0a9ece..b169cfd0ab9 100644
--- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
+++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory;
 insert into t (`arr.key`, `arr.value`) values (['a'], ['b']);
 select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr;
diff --git a/tests/queries/0_stateless/02807_lower_utf8_msan.sql b/tests/queries/0_stateless/02807_lower_utf8_msan.sql
index e9eb18bf615..95f224577f7 100644
--- a/tests/queries/0_stateless/02807_lower_utf8_msan.sql
+++ b/tests/queries/0_stateless/02807_lower_utf8_msan.sql
@@ -1,2 +1,5 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SELECT lowerUTF8(arrayJoin(['©--------------------------------------', '©--------------------'])) ORDER BY 1;
 SELECT upperUTF8(materialize('aaaaАБВГaaaaaaaaaaaaАБВГAAAAaaAA')) FROM numbers(2);
diff --git a/tests/queries/0_stateless/03015_peder1001.sql b/tests/queries/0_stateless/03015_peder1001.sql
index 810503207f2..df8e4db1536 100644
--- a/tests/queries/0_stateless/03015_peder1001.sql
+++ b/tests/queries/0_stateless/03015_peder1001.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 DROP TABLE IF EXISTS test_data;
 
 CREATE TABLE test_data

From 4600b270dafec20b276ab83eb557270c24cb4169 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 16 Aug 2024 17:58:54 +0800
Subject: [PATCH 154/363] remote icu contrib

---
 .gitmodules | 3 ---
 contrib/icu | 1 -
 2 files changed, 4 deletions(-)
 delete mode 160000 contrib/icu

diff --git a/.gitmodules b/.gitmodules
index 7fdfb1103c5..164da311930 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -106,9 +106,6 @@
 [submodule "contrib/icudata"]
 	path = contrib/icudata
 	url = https://github.com/ClickHouse/icudata
-[submodule "contrib/icu"]
-	path = contrib/icu
-	url = https://github.com/unicode-org/icu
 [submodule "contrib/flatbuffers"]
 	path = contrib/flatbuffers
 	url = https://github.com/ClickHouse/flatbuffers
diff --git a/contrib/icu b/contrib/icu
deleted file mode 160000
index 7750081bda4..00000000000
--- a/contrib/icu
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625

From 3ee741bd5e33d16b2f5711a8f2b06fca1a64b7bc Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 16 Aug 2024 18:04:15 +0800
Subject: [PATCH 155/363] add submodule contrib/icu from clickhouse

---
 .gitmodules | 4 ++++
 contrib/icu | 1 +
 2 files changed, 5 insertions(+)
 create mode 160000 contrib/icu

diff --git a/.gitmodules b/.gitmodules
index 164da311930..a8cc6a07caf 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -372,3 +372,7 @@
 [submodule "contrib/numactl"]
 	path = contrib/numactl
 	url = https://github.com/ClickHouse/numactl.git
+[submodule "contrib/icu"]
+	path = contrib/icu
+	url = https://github.com/ClickHouse/icu
+	branch = ClickHouse/release-75-1
diff --git a/contrib/icu b/contrib/icu
new file mode 160000
index 00000000000..4216173eeeb
--- /dev/null
+++ b/contrib/icu
@@ -0,0 +1 @@
+Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273

From 5ff4d990e189dfee42eb57f567a5ff6313cfa8d8 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Fri, 16 Aug 2024 11:11:11 +0200
Subject: [PATCH 156/363] CI: Auto Releases in prod

---
 .github/workflows/auto_releases.yml  | 76 ++++++++++------------------
 .github/workflows/create_release.yml |  2 +
 tests/ci/auto_release.py             |  9 ++++
 tests/ci/ci_utils.py                 | 24 ++++++---
 4 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/.github/workflows/auto_releases.yml b/.github/workflows/auto_releases.yml
index c159907187c..28483ea136f 100644
--- a/.github/workflows/auto_releases.yml
+++ b/.github/workflows/auto_releases.yml
@@ -19,13 +19,11 @@ on:
 
 jobs:
   AutoReleaseInfo:
-    runs-on: [self-hosted, style-checker-aarch64]
+    runs-on: [self-hosted, release-maker]
     outputs:
       data: ${{ steps.info.outputs.AUTO_RELEASE_PARAMS }}
       dry_run: ${{ steps.info.outputs.DRY_RUN }}
     steps:
-      - name: Debug Info
-        uses: ./.github/actions/debug
       - name: Set envs
         run: |
           cat >> "$GITHUB_ENV" << 'EOF'
@@ -36,6 +34,10 @@ jobs:
           echo "DRY_RUN=true" >> "$GITHUB_ENV"
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
+        with:
+          fetch-depth: 0  # full history needed
+      - name: Debug Info
+        uses: ./.github/actions/debug
       - name: Prepare Info
         id: info
         run: |
@@ -46,12 +48,7 @@ jobs:
           echo "::endgroup::"
           {
               echo 'AUTO_RELEASE_PARAMS<<EOF'
-              cat  /tmp/autorelease_info.json
-              echo 'EOF'
-          } >> "$GITHUB_ENV"
-          {
-              echo 'AUTO_RELEASE_PARAMS<<EOF'
-              cat  /tmp/autorelease_info.json
+              cat  /tmp/autorelease_params.json
               echo 'EOF'
           } >> "$GITHUB_OUTPUT"
           echo "DRY_RUN=true" >> "$GITHUB_OUTPUT"
@@ -62,48 +59,29 @@ jobs:
       - name: Clean up
         uses: ./.github/actions/clean
 
-  Release_0:
+  Releases:
     needs: AutoReleaseInfo
-    name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].release_branch }}
-    if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].ready }}
+    strategy:
+      matrix:
+        release_params: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases }}
+      max-parallel: 1
+    name: Release ${{ matrix.release_params.release_branch }}
     uses: ./.github/workflows/create_release.yml
     with:
-      ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }}
+      ref: ${{ matrix.release_params.commit_sha }}
       type: patch
-      dry-run: ${{ needs.AutoReleaseInfo.outputs.dry_run }}
-#
-#  Release_1:
-#    needs: [AutoReleaseInfo, Release_0]
-#    name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].release_branch }}
-#    if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].ready }}
-#    uses: ./.github/workflows/create_release.yml
-#    with:
-#      ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].commit_sha }}
-#      type: patch
-#      dry-run: ${{ env.DRY_RUN }}
-#
-#  Release_2:
-#    needs: [AutoReleaseInfo, Release_1]
-#    name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].release_branch }}
-#    if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].ready }}
-#    uses: ./.github/workflow/create_release.yml
-#    with:
-#      ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }}
-#      type: patch
-#      dry-run: ${{ env.DRY_RUN }}
-#
-#  Release_3:
-#    needs: [AutoReleaseInfo, Release_2]
-#    name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].release_branch }}
-#    if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].ready }}
-#    uses: ./.github/workflow/create_release.yml
-#    with:
-#      ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].commit_sha }}
-#      type: patch
-#      dry-run: ${{ env.DRY_RUN }}
+      dry-run: ${{ fromJson(needs.AutoReleaseInfo.outputs.dry_run) }}
+    secrets:
+      ROBOT_CLICKHOUSE_COMMIT_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
 
-#  - name: Post Slack Message
-#    if: ${{ !cancelled() }}
-#    run: |
-#      cd "$GITHUB_WORKSPACE/tests/ci"
-#      python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
+  PostSlackMessage:
+    needs: [AutoReleaseInfo]
+    runs-on: [self-hosted, release-maker]
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+      - name: Post
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml
index 1553d689227..1fb6cb60e96 100644
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@@ -47,6 +47,8 @@ concurrency:
         required: false
         default: false
         type: boolean
+    secrets:
+      ROBOT_CLICKHOUSE_COMMIT_TOKEN:
 
 jobs:
   CreateRelease:
diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py
index 3cc88634004..58cfc833afe 100644
--- a/tests/ci/auto_release.py
+++ b/tests/ci/auto_release.py
@@ -1,4 +1,5 @@
 import argparse
+import copy
 import dataclasses
 import json
 import os
@@ -46,6 +47,7 @@ def parse_args():
 
 MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE = 5
 AUTORELEASE_INFO_FILE = "/tmp/autorelease_info.json"
+AUTORELEASE_MATRIX_PARAMS = "/tmp/autorelease_params.json"
 
 
 @dataclasses.dataclass
@@ -74,6 +76,12 @@ class AutoReleaseInfo:
         with open(AUTORELEASE_INFO_FILE, "w", encoding="utf-8") as f:
             print(json.dumps(dataclasses.asdict(self), indent=2), file=f)
 
+        # dump file for GH action matrix that is similar to the file above but with dropped not ready release branches
+        params = copy.deepcopy(self)
+        params.releases = [release for release in params.releases if release.ready]
+        with open(AUTORELEASE_MATRIX_PARAMS, "w", encoding="utf-8") as f:
+            print(json.dumps(params, indent=2), file=f)
+
     @staticmethod
     def from_file() -> "AutoReleaseInfo":
         with open(AUTORELEASE_INFO_FILE, "r", encoding="utf-8") as json_file:
@@ -136,6 +144,7 @@ def _prepare(token):
             commit_ci_status = CI.GH.get_commit_status_by_name(
                 token=token,
                 commit_sha=commit,
+                # handle old name for old releases
                 status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"),
             )
             commit_sha = commit
diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py
index b8778e0cc50..97ab10f1b58 100644
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@@ -102,21 +102,29 @@ class GH:
         assert len(commit_sha) == 40
         assert Utils.is_hex(commit_sha)
         assert not Utils.is_hex(token)
-        url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}"
+
+        url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses"
         headers = {
             "Authorization": f"token {token}",
             "Accept": "application/vnd.github.v3+json",
         }
-        response = requests.get(url, headers=headers, timeout=5)
 
         if isinstance(status_name, str):
             status_name = (status_name,)
-        if response.status_code == 200:
-            assert "next" not in response.links, "Response truncated"
-            statuses = response.json()
-            for status in statuses:
-                if status["context"] in status_name:
-                    return status["state"]  # type: ignore
+
+        while url:
+            response = requests.get(url, headers=headers, timeout=5)
+            if response.status_code == 200:
+                statuses = response.json()
+                for status in statuses:
+                    if status["context"] in status_name:
+                        return status["state"]
+
+                # Check if there is a next page
+                url = response.links.get("next", {}).get("url")
+            else:
+                break
+
         return ""
 
     @staticmethod

From f17655f13fcadb9babbc859e45e9f38cb32ad9e3 Mon Sep 17 00:00:00 2001
From: Dani Pozo <dani.pozo@tinybird.co>
Date: Thu, 1 Aug 2024 11:01:27 +0200
Subject: [PATCH 157/363] Load filesystem cache metadata asynchronously

---
 src/Common/StatusFile.cpp                     |   2 +-
 src/Disks/IO/ReadBufferFromRemoteFSGather.cpp |  35 +++---
 .../Cached/CachedObjectStorage.cpp            |   5 +-
 src/Interpreters/Cache/FileCache.cpp          |  71 +++++++++---
 src/Interpreters/Cache/FileCache.h            |  10 ++
 src/Interpreters/Cache/FileCacheSettings.cpp  |   3 +
 src/Interpreters/Cache/FileCacheSettings.h    |   1 +
 .../InterpreterDescribeCacheQuery.cpp         |   2 +
 src/Interpreters/TemporaryDataOnDisk.cpp      |   4 +-
 src/Interpreters/tests/gtest_filecache.cpp    |  11 ++
 .../System/StorageSystemFilesystemCache.cpp   |   3 +
 .../StorageSystemFilesystemCacheSettings.cpp  |   2 +
 tests/config/config.d/storage_conf.xml        |   2 +
 tests/config/config.d/storage_conf_02944.xml  |   1 +
 .../integration/test_filesystem_cache/test.py | 106 +++++++++++++++---
 .../02344_describe_cache.reference            |   2 +-
 .../0_stateless/02344_describe_cache.sh       |   2 +-
 ...8_filesystem_cache_as_collection.reference |   4 +-
 .../02908_filesystem_cache_as_collection.sql  |   4 +-
 ...ge_cache_setting_without_restart.reference |  14 +--
 ...lly_change_filesystem_cache_size.reference |  10 +-
 21 files changed, 227 insertions(+), 67 deletions(-)

diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp
index 80464f38082..0bbb7ff411d 100644
--- a/src/Common/StatusFile.cpp
+++ b/src/Common/StatusFile.cpp
@@ -51,7 +51,7 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
         std::string contents;
         {
             ReadBufferFromFile in(path, 1024);
-            LimitReadBuffer limit_in(in, 1024, /* trow_exception */ false, /* exact_limit */ {});
+            LimitReadBuffer limit_in(in, 1024, /* throw_exception */ false, /* exact_limit */ {});
             readStringUntilEOF(contents, limit_in);
         }
 
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index bb9761a3905..c96f5f0c931 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -80,20 +80,27 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c
 
     if (with_file_cache)
     {
-        auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path);
-        buf = std::make_unique<CachedOnDiskReadBufferFromFile>(
-            object_path,
-            cache_key,
-            settings.remote_fs_cache,
-            FileCache::getCommonUser(),
-            [=, this]() { return read_buffer_creator(/* restricted_seek */true, object); },
-            settings,
-            query_id,
-            object.bytes_size,
-            /* allow_seeks */false,
-            /* use_external_buffer */true,
-            /* read_until_position */std::nullopt,
-            cache_log);
+        if (settings.remote_fs_cache->isInitialized())
+        {
+            auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path);
+            buf = std::make_unique<CachedOnDiskReadBufferFromFile>(
+                object_path,
+                cache_key,
+                settings.remote_fs_cache,
+                FileCache::getCommonUser(),
+                [=, this]() { return read_buffer_creator(/* restricted_seek */true, object); },
+                settings,
+                query_id,
+                object.bytes_size,
+                /* allow_seeks */false,
+                /* use_external_buffer */true,
+                /* read_until_position */std::nullopt,
+                cache_log);
+        }
+        else
+        {
+            settings.remote_fs_cache->throwInitExceptionIfNeeded();
+        }
     }
 
     /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index fb817005399..ab0d357119c 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -99,7 +99,7 @@ std::unique_ptr<WriteBufferFromFileBase> CachedObjectStorage::writeObject( /// N
     /// Need to remove even if cache_on_write == false.
     removeCacheIfExists(object.remote_path);
 
-    if (cache_on_write)
+    if (cache_on_write && cache->isInitialized())
     {
         auto key = getCacheKey(object.remote_path);
         return std::make_unique<CachedOnDiskWriteBufferFromFile>(
@@ -122,7 +122,8 @@ void CachedObjectStorage::removeCacheIfExists(const std::string & path_key_for_c
         return;
 
     /// Add try catch?
-    cache->removeKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id);
+    if (cache->isInitialized())
+        cache->removeKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id);
 }
 
 void CachedObjectStorage::removeObject(const StoredObject & object)
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index e3925163362..4c35c0f7f4c 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -11,11 +11,15 @@
 #include <Interpreters/Cache/EvictionCandidates.h>
 #include <Interpreters/Context.h>
 #include <base/hex.h>
+#include <Common/callOnce.h>
+#include <Common/Exception.h>
 #include <Common/ThreadPool.h>
 #include <Common/ElapsedTimeProfileEventIncrement.h>
 #include <Core/ServerUUID.h>
 
+#include <exception>
 #include <filesystem>
+#include <mutex>
 
 
 namespace fs = std::filesystem;
@@ -88,6 +92,7 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
     , bypass_cache_threshold(settings.enable_bypass_cache_with_threshold ? settings.bypass_cache_threshold : 0)
     , boundary_alignment(settings.boundary_alignment)
     , load_metadata_threads(settings.load_metadata_threads)
+    , load_metadata_asynchronously(settings.load_metadata_asynchronously)
     , write_cache_per_user_directory(settings.write_cache_per_user_id_directory)
     , keep_current_size_to_max_ratio(1 - settings.keep_free_space_size_ratio)
     , keep_current_elements_to_max_ratio(1 - settings.keep_free_space_elements_ratio)
@@ -136,7 +141,17 @@ const FileCache::UserInfo & FileCache::getInternalUser()
 
 bool FileCache::isInitialized() const
 {
-    return is_initialized.load(std::memory_order_seq_cst);
+    return is_initialized;
+}
+
+void FileCache::throwInitExceptionIfNeeded()
+{
+    if (load_metadata_asynchronously)
+        return;
+
+    std::lock_guard lock(init_mutex);
+    if (init_exception)
+        std::rethrow_exception(init_exception);
 }
 
 const String & FileCache::getBasePath() const
@@ -170,6 +185,35 @@ void FileCache::assertInitialized() const
 }
 
 void FileCache::initialize()
+{
+    // Prevent initialize() from running twice. This may be caused by two cache disks being created with the same path (see integration/test_filesystem_cache).
+    callOnce(initialize_called, [&] {
+        bool need_to_load_metadata = fs::exists(getBasePath());
+        try
+        {
+            if (!need_to_load_metadata)
+                fs::create_directories(getBasePath());
+            status_file = make_unique<StatusFile>(fs::path(getBasePath()) / "status", StatusFile::write_full_info);
+        }
+        catch (...)
+        {
+            init_exception = std::current_exception();
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+            throw;
+        }
+
+        if (load_metadata_asynchronously)
+        {
+            load_metadata_main_thread = ThreadFromGlobalPool([this, need_to_load_metadata] { initializeImpl(need_to_load_metadata); });
+        }
+        else
+        {
+            initializeImpl(need_to_load_metadata);
+        }
+    });
+}
+
+void FileCache::initializeImpl(bool load_metadata)
 {
     std::lock_guard lock(init_mutex);
 
@@ -178,16 +222,10 @@ void FileCache::initialize()
 
     try
     {
-        if (fs::exists(getBasePath()))
-        {
+        if (load_metadata)
             loadMetadata();
-        }
-        else
-        {
-            fs::create_directories(getBasePath());
-        }
 
-        status_file = make_unique<StatusFile>(fs::path(getBasePath()) / "status", StatusFile::write_full_info);
+        metadata.startup();
     }
     catch (...)
     {
@@ -196,8 +234,6 @@ void FileCache::initialize()
         throw;
     }
 
-    metadata.startup();
-
     if (keep_current_size_to_max_ratio != 1 || keep_current_elements_to_max_ratio != 1)
     {
         keep_up_free_space_ratio_task = Context::getGlobalContextInstance()->getSchedulePool().createTask(log->name(), [this] { freeSpaceRatioKeepingThreadFunc(); });
@@ -205,6 +241,7 @@ void FileCache::initialize()
     }
 
     is_initialized = true;
+    LOG_TEST(log, "Initialized cache from {}", metadata.getBaseDirectory());
 }
 
 CachePriorityGuard::Lock FileCache::lockCache() const
@@ -1185,7 +1222,6 @@ void FileCache::loadMetadataImpl()
     std::vector<ThreadFromGlobalPool> loading_threads;
     std::exception_ptr first_exception;
     std::mutex set_exception_mutex;
-    std::atomic<bool> stop_loading = false;
 
     LOG_INFO(log, "Loading filesystem cache with {} threads from {}", load_metadata_threads, metadata.getBaseDirectory());
 
@@ -1195,7 +1231,7 @@ void FileCache::loadMetadataImpl()
         {
             loading_threads.emplace_back([&]
             {
-                while (!stop_loading)
+                while (!stop_loading_metadata)
                 {
                     try
                     {
@@ -1212,7 +1248,7 @@ void FileCache::loadMetadataImpl()
                             if (!first_exception)
                                 first_exception = std::current_exception();
                         }
-                        stop_loading = true;
+                        stop_loading_metadata = true;
                         return;
                     }
                 }
@@ -1225,7 +1261,7 @@ void FileCache::loadMetadataImpl()
                 if (!first_exception)
                     first_exception = std::current_exception();
             }
-            stop_loading = true;
+            stop_loading_metadata = true;
             break;
         }
     }
@@ -1412,6 +1448,11 @@ FileCache::~FileCache()
 void FileCache::deactivateBackgroundOperations()
 {
     shutdown.store(true);
+
+    stop_loading_metadata = true;
+    if (load_metadata_main_thread.joinable())
+        load_metadata_main_thread.join();
+
     metadata.shutdown();
     if (keep_up_free_space_ratio_task)
         keep_up_free_space_ratio_task->deactivate();
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 07be802a940..579472eb824 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -8,6 +8,7 @@
 
 #include <IO/ReadSettings.h>
 
+#include <Common/callOnce.h>
 #include <Common/ThreadPool.h>
 #include <Common/StatusFile.h>
 #include <Interpreters/Cache/LRUFileCachePriority.h>
@@ -82,6 +83,9 @@ public:
 
     bool isInitialized() const;
 
+    /// Throws if `!load_metadata_asynchronously` and there is an exception in `init_exception`
+    void throwInitExceptionIfNeeded();
+
     const String & getBasePath() const;
 
     static Key createKeyForPath(const String & path);
@@ -198,6 +202,9 @@ private:
     const size_t bypass_cache_threshold;
     const size_t boundary_alignment;
     size_t load_metadata_threads;
+    const bool load_metadata_asynchronously;
+    std::atomic<bool> stop_loading_metadata = false;
+    ThreadFromGlobalPool load_metadata_main_thread;
     const bool write_cache_per_user_directory;
 
     BackgroundSchedulePool::TaskHolder keep_up_free_space_ratio_task;
@@ -209,6 +216,7 @@ private:
 
     std::exception_ptr init_exception;
     std::atomic<bool> is_initialized = false;
+    OnceFlag initialize_called;
     mutable std::mutex init_mutex;
     std::unique_ptr<StatusFile> status_file;
     std::atomic<bool> shutdown = false;
@@ -246,6 +254,8 @@ private:
      */
     FileCacheQueryLimitPtr query_limit;
 
+    void initializeImpl(bool load_metadata);
+
     void assertInitialized() const;
     void assertCacheCorrectness();
 
diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index c68ff3183c6..e162d6b7551 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -65,6 +65,9 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin
     if (has("load_metadata_threads"))
         load_metadata_threads = get_uint("load_metadata_threads");
 
+    if (has("load_metadata_asynchronously"))
+        load_metadata_asynchronously = get_uint("load_metadata_asynchronously");
+
     if (boundary_alignment > max_file_segment_size)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `boundary_alignment` cannot exceed `max_file_segment_size`");
 
diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h
index 93ded202947..72a2b6c3369 100644
--- a/src/Interpreters/Cache/FileCacheSettings.h
+++ b/src/Interpreters/Cache/FileCacheSettings.h
@@ -32,6 +32,7 @@ struct FileCacheSettings
     size_t background_download_queue_size_limit = FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_QUEUE_SIZE_LIMIT;
 
     size_t load_metadata_threads = FILECACHE_DEFAULT_LOAD_METADATA_THREADS;
+    bool load_metadata_asynchronously = false;
 
     bool write_cache_per_user_id_directory = false;
 
diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp
index c7e863bf260..c7464dc6b77 100644
--- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp
+++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp
@@ -20,6 +20,7 @@ static Block getSampleBlock()
         ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_size"},
         ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_elements"},
         ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_file_segment_size"},
+        ColumnWithTypeAndName{std::make_shared<DataTypeUInt8>(), "is_initialized"},
         ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "boundary_alignment"},
         ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "cache_on_write_operations"},
         ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "cache_hits_threshold"},
@@ -50,6 +51,7 @@ BlockIO InterpreterDescribeCacheQuery::execute()
     res_columns[i++]->insert(settings.max_size);
     res_columns[i++]->insert(settings.max_elements);
     res_columns[i++]->insert(settings.max_file_segment_size);
+    res_columns[i++]->insert(cache->isInitialized());
     res_columns[i++]->insert(settings.boundary_alignment);
     res_columns[i++]->insert(settings.cache_on_write_operations);
     res_columns[i++]->insert(settings.cache_hits_threshold);
diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp
index 7f0fb8cd6ca..3259d7b67d6 100644
--- a/src/Interpreters/TemporaryDataOnDisk.cpp
+++ b/src/Interpreters/TemporaryDataOnDisk.cpp
@@ -65,7 +65,7 @@ TemporaryDataOnDisk::TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_, Cu
 
 std::unique_ptr<WriteBufferFromFileBase> TemporaryDataOnDisk::createRawStream(size_t max_file_size)
 {
-    if (file_cache)
+    if (file_cache && file_cache->isInitialized())
     {
         auto holder = createCacheFile(max_file_size);
         return std::make_unique<WriteBufferToFileSegment>(std::move(holder));
@@ -81,7 +81,7 @@ std::unique_ptr<WriteBufferFromFileBase> TemporaryDataOnDisk::createRawStream(si
 
 TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, size_t max_file_size)
 {
-    if (file_cache)
+    if (file_cache && file_cache->isInitialized())
     {
         auto holder = createCacheFile(max_file_size);
 
diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp
index 36acc319f4e..5e2d3ee8219 100644
--- a/src/Interpreters/tests/gtest_filecache.cpp
+++ b/src/Interpreters/tests/gtest_filecache.cpp
@@ -7,6 +7,7 @@
 #include <algorithm>
 #include <numeric>
 #include <thread>
+#include <chrono>
 
 #include <Core/ServerUUID.h>
 #include <Common/iota.h>
@@ -42,6 +43,7 @@
 #include <Interpreters/DatabaseCatalog.h>
 #include <base/scope_guard.h>
 
+using namespace std::chrono_literals;
 namespace fs = std::filesystem;
 using namespace DB;
 
@@ -358,9 +360,11 @@ TEST_F(FileCacheTest, LRUPolicy)
     settings.max_size = 30;
     settings.max_elements = 5;
     settings.boundary_alignment = 1;
+    settings.load_metadata_asynchronously = false;
 
     const size_t file_size = INT_MAX; // the value doesn't really matter because boundary_alignment == 1.
 
+
     const auto user = FileCache::getCommonUser();
     {
         std::cerr << "Step 1\n";
@@ -815,6 +819,7 @@ TEST_F(FileCacheTest, writeBuffer)
     settings.max_elements = 5;
     settings.max_file_segment_size = 5;
     settings.base_path = cache_base_path;
+    settings.load_metadata_asynchronously = false;
 
     FileCache cache("6", settings);
     cache.initialize();
@@ -946,6 +951,7 @@ TEST_F(FileCacheTest, temporaryData)
     settings.max_size = 10_KiB;
     settings.max_file_segment_size = 1_KiB;
     settings.base_path = cache_base_path;
+    settings.load_metadata_asynchronously = false;
 
     DB::FileCache file_cache("7", settings);
     file_cache.initialize();
@@ -1073,6 +1079,7 @@ TEST_F(FileCacheTest, CachedReadBuffer)
     settings.max_size = 30;
     settings.max_elements = 10;
     settings.boundary_alignment = 1;
+    settings.load_metadata_asynchronously = false;
 
     ReadSettings read_settings;
     read_settings.enable_filesystem_cache = true;
@@ -1092,6 +1099,7 @@ TEST_F(FileCacheTest, CachedReadBuffer)
 
     auto cache = std::make_shared<DB::FileCache>("8", settings);
     cache->initialize();
+
     auto key = cache->createKeyForPath(file_path);
     const auto user = FileCache::getCommonUser();
 
@@ -1132,6 +1140,7 @@ TEST_F(FileCacheTest, TemporaryDataReadBufferSize)
         settings.max_size = 10_KiB;
         settings.max_file_segment_size = 1_KiB;
         settings.base_path = cache_base_path;
+        settings.load_metadata_asynchronously = false;
 
         DB::FileCache file_cache("cache", settings);
         file_cache.initialize();
@@ -1195,6 +1204,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
     settings.max_size = 40;
     settings.max_elements = 6;
     settings.boundary_alignment = 1;
+    settings.load_metadata_asynchronously = false;
 
     settings.cache_policy = "SLRU";
     settings.slru_size_ratio = 0.5;
@@ -1307,6 +1317,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
         settings2.boundary_alignment = 1;
         settings2.cache_policy = "SLRU";
         settings2.slru_size_ratio = 0.5;
+        settings.load_metadata_asynchronously = false;
 
         auto cache = std::make_shared<DB::FileCache>("slru_2", settings2);
         cache->initialize();
diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp
index cfb388bc232..0e972d8411b 100644
--- a/src/Storages/System/StorageSystemFilesystemCache.cpp
+++ b/src/Storages/System/StorageSystemFilesystemCache.cpp
@@ -47,6 +47,9 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex
     for (const auto & [cache_name, cache_data] : caches)
     {
         const auto & cache = cache_data->cache;
+        if (!cache->isInitialized())
+            continue;
+
         cache->iterate([&](const FileSegment::Info & file_segment)
         {
             size_t i = 0;
diff --git a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp
index 8915032baf7..c6bba6b8598 100644
--- a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp
+++ b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp
@@ -21,6 +21,7 @@ ColumnsDescription StorageSystemFilesystemCacheSettings::getColumnsDescription()
         {"path", std::make_shared<DataTypeString>(), "Cache directory"},
         {"max_size", std::make_shared<DataTypeUInt64>(), "Cache size limit by the number of bytes"},
         {"max_elements", std::make_shared<DataTypeUInt64>(), "Cache size limit by the number of elements"},
+        {"is_initialized", std::make_shared<DataTypeUInt8>(), "Whether the cache is initialized and ready to be used"},
         {"current_size", std::make_shared<DataTypeUInt64>(), "Current cache size by the number of bytes"},
         {"current_elements", std::make_shared<DataTypeUInt64>(), "Current cache size by the number of elements"},
         {"max_file_segment_size", std::make_shared<DataTypeUInt64>(), "Maximum allowed file segment size"},
@@ -56,6 +57,7 @@ void StorageSystemFilesystemCacheSettings::fillData(
         res_columns[i++]->insert(settings.base_path);
         res_columns[i++]->insert(settings.max_size);
         res_columns[i++]->insert(settings.max_elements);
+        res_columns[i++]->insert(cache->isInitialized());
         res_columns[i++]->insert(cache->getUsedCacheSize());
         res_columns[i++]->insert(cache->getFileSegmentsNum());
         res_columns[i++]->insert(settings.max_file_segment_size);
diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml
index e106e3a0e6b..091071f0637 100644
--- a/tests/config/config.d/storage_conf.xml
+++ b/tests/config/config.d/storage_conf.xml
@@ -27,6 +27,7 @@
                 <slru_size_ratio>0.3</slru_size_ratio>
                 <keep_free_space_size_ratio>0.15</keep_free_space_size_ratio>
                 <keep_free_space_elements_ratio>0.15</keep_free_space_elements_ratio>
+                <load_metadata_asynchronously>0</load_metadata_asynchronously>
             </s3_cache>
             <s3_cache_02933>
                 <type>cache</type>
@@ -37,6 +38,7 @@
                 <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
                 <background_download_threads>0</background_download_threads>
                 <background_download_queue_size_limit>0</background_download_queue_size_limit>
+                <load_metadata_asynchronously>0</load_metadata_asynchronously>
             </s3_cache_02933>
             <!-- local disks -->
             <local_disk>
diff --git a/tests/config/config.d/storage_conf_02944.xml b/tests/config/config.d/storage_conf_02944.xml
index 5f45640a923..08d78900229 100644
--- a/tests/config/config.d/storage_conf_02944.xml
+++ b/tests/config/config.d/storage_conf_02944.xml
@@ -19,6 +19,7 @@
                 <boundary_alignment>10</boundary_alignment>
                 <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
                 <cache_on_write_operations>0</cache_on_write_operations>
+                <load_metadata_asynchronously>0</load_metadata_asynchronously>
             </s3_cache_02944>
         </disks>
     </storage_configuration>
diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py
index 17a8dd8b6e1..aee8bd25c2e 100644
--- a/tests/integration/test_filesystem_cache/test.py
+++ b/tests/integration/test_filesystem_cache/test.py
@@ -1,6 +1,7 @@
 import logging
 import time
 import os
+import random
 
 import pytest
 from helpers.cluster import ClickHouseCluster
@@ -30,14 +31,6 @@ def cluster():
                 "config.d/storage_conf_2.xml",
             ],
         )
-        cluster.add_instance(
-            "node_no_filesystem_caches_path",
-            main_configs=[
-                "config.d/storage_conf.xml",
-                "config.d/remove_filesystem_caches_path.xml",
-            ],
-            stay_alive=True,
-        )
         cluster.add_instance(
             "node_force_read_through_cache_on_merge",
             main_configs=[
@@ -59,6 +52,51 @@ def cluster():
         cluster.shutdown()
 
 
+@pytest.fixture(scope="function")
+def non_shared_cluster():
+    """
+    For tests that cannot run in parallel against the same node/cluster (see test_custom_cached_disk, which relies on
+    changing server settings at runtime)
+    """
+    try:
+        # Randomize the cluster name
+        cluster = ClickHouseCluster(f"{__file__}_non_shared_{random.randint(0, 10**7)}")
+        cluster.add_instance(
+            "node_no_filesystem_caches_path",
+            main_configs=[
+                "config.d/storage_conf.xml",
+                "config.d/remove_filesystem_caches_path.xml",
+            ],
+            stay_alive=True,
+        )
+
+        logging.info("Starting test-exclusive cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def wait_for_cache_initialized(node, cache_path, max_attempts=50):
+    initialized = False
+    attempts = 0
+    while not initialized:
+        query_result = node.query(
+            "SELECT path FROM system.filesystem_cache_settings WHERE is_initialized"
+        )
+        initialized = cache_path in query_result
+
+        if initialized:
+            break
+
+        time.sleep(0.1)
+        attempts += 1
+        if attempts >= max_attempts:
+            raise "Stopped waiting for cache to be initialized"
+
+
 @pytest.mark.parametrize("node_name", ["node"])
 def test_parallel_cache_loading_on_startup(cluster, node_name):
     node = cluster.instances[node_name]
@@ -71,14 +109,21 @@ def test_parallel_cache_loading_on_startup(cluster, node_name):
         ORDER BY value
         SETTINGS disk = disk(
             type = cache,
-            path = 'paralel_loading_test',
+            name = 'parallel_loading_test',
+            path = 'parallel_loading_test',
             disk = 'hdd_blob',
             max_file_segment_size = '1Ki',
             boundary_alignment = '1Ki',
             max_size = '1Gi',
             max_elements = 10000000,
             load_metadata_threads = 30);
+        """
+    )
 
+    wait_for_cache_initialized(node, "parallel_loading_test")
+
+    node.query(
+        """
         SYSTEM DROP FILESYSTEM CACHE;
         INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000;
         SELECT * FROM test FORMAT Null;
@@ -103,6 +148,7 @@ def test_parallel_cache_loading_on_startup(cluster, node_name):
     )
 
     node.restart_clickhouse()
+    wait_for_cache_initialized(node, "parallel_loading_test")
 
     # < because of additional files loaded into cache on server startup.
     assert cache_count <= int(node.query("SELECT count() FROM system.filesystem_cache"))
@@ -131,7 +177,7 @@ def test_caches_with_the_same_configuration(cluster, node_name):
     node = cluster.instances[node_name]
     cache_path = "cache1"
 
-    node.query(f"SYSTEM DROP FILESYSTEM CACHE;")
+    node.query("SYSTEM DROP FILESYSTEM CACHE;")
     for table in ["test", "test2"]:
         node.query(
             f"""
@@ -142,14 +188,20 @@ def test_caches_with_the_same_configuration(cluster, node_name):
             ORDER BY value
             SETTINGS disk = disk(
                 type = cache,
-                name = {table},
+                name = '{table}',
                 path = '{cache_path}',
                 disk = 'hdd_blob',
                 max_file_segment_size = '1Ki',
                 boundary_alignment = '1Ki',
                 cache_on_write_operations=1,
                 max_size = '1Mi');
+            """
+        )
 
+        wait_for_cache_initialized(node, cache_path)
+
+        node.query(
+            f"""
             SET enable_filesystem_cache_on_write_operations=1;
             INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String')
             LIMIT 1000;
@@ -195,9 +247,8 @@ def test_caches_with_the_same_configuration(cluster, node_name):
 @pytest.mark.parametrize("node_name", ["node_caches_with_same_path"])
 def test_caches_with_the_same_configuration_2(cluster, node_name):
     node = cluster.instances[node_name]
-    cache_path = "cache1"
 
-    node.query(f"SYSTEM DROP FILESYSTEM CACHE;")
+    node.query("SYSTEM DROP FILESYSTEM CACHE;")
     for table in ["cache1", "cache2"]:
         node.query(
             f"""
@@ -207,7 +258,13 @@ def test_caches_with_the_same_configuration_2(cluster, node_name):
             Engine=MergeTree()
             ORDER BY value
             SETTINGS disk = '{table}';
+            """
+        )
 
+        wait_for_cache_initialized(node, "cache1")
+
+        node.query(
+            f"""
             SET enable_filesystem_cache_on_write_operations=1;
             INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String')
             LIMIT 1000;
@@ -227,8 +284,8 @@ def test_caches_with_the_same_configuration_2(cluster, node_name):
     )
 
 
-def test_custom_cached_disk(cluster):
-    node = cluster.instances["node_no_filesystem_caches_path"]
+def test_custom_cached_disk(non_shared_cluster):
+    node = non_shared_cluster.instances["node_no_filesystem_caches_path"]
 
     assert "Cannot create cached custom disk without" in node.query_and_get_error(
         f"""
@@ -377,6 +434,7 @@ def test_force_filesystem_cache_on_merges(cluster):
             ORDER BY value
             SETTINGS disk = disk(
                 type = cache,
+                name = 'force_cache_on_merges',
                 path = 'force_cache_on_merges',
                 disk = 'hdd_blob',
                 max_file_segment_size = '1Ki',
@@ -385,7 +443,13 @@ def test_force_filesystem_cache_on_merges(cluster):
                 max_size = '10Gi',
                 max_elements = 10000000,
                 load_metadata_threads = 30);
+            """
+        )
 
+        wait_for_cache_initialized(node, "force_cache_on_merges")
+
+        node.query(
+            """
             SYSTEM DROP FILESYSTEM CACHE;
             INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000;
             INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000;
@@ -441,7 +505,13 @@ SETTINGS disk = disk(type = cache,
             path = "test_system_sync_filesystem_cache",
             delayed_cleanup_interval_ms = 10000000, disk = hdd_blob),
         min_bytes_for_wide_part = 10485760;
+    """
+    )
 
+    wait_for_cache_initialized(node, "test_system_sync_filesystem_cache")
+
+    node.query(
+        """
 INSERT INTO test SELECT 1, 'test';
     """
     )
@@ -525,7 +595,13 @@ SETTINGS disk = disk(type = cache,
             keep_free_space_elements_ratio = {elements_ratio},
             disk = hdd_blob),
         min_bytes_for_wide_part = 10485760;
+    """
+    )
 
+    wait_for_cache_initialized(node, "test_keep_up_size_ratio")
+
+    node.query(
+        """
 INSERT INTO test SELECT randomString(200);
     """
     )
diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference
index 6895606eb2b..13429b14866 100644
--- a/tests/queries/0_stateless/02344_describe_cache.reference
+++ b/tests/queries/0_stateless/02344_describe_cache.reference
@@ -1,2 +1,2 @@
 1
-102400	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/02344_describe_cache_test	0	5000	0	16
+102400	10000000	33554432	1	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/02344_describe_cache_test	0	5000	0	16
diff --git a/tests/queries/0_stateless/02344_describe_cache.sh b/tests/queries/0_stateless/02344_describe_cache.sh
index d91661db9bc..c5373b4d7e3 100755
--- a/tests/queries/0_stateless/02344_describe_cache.sh
+++ b/tests/queries/0_stateless/02344_describe_cache.sh
@@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -nm --query """
 DROP TABLE IF EXISTS test;
 CREATE TABLE test (a Int32, b String)
 ENGINE = MergeTree() ORDER BY tuple()
-SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = '$disk_name', disk = 's3_disk');
+SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = '$disk_name', disk = 's3_disk', load_metadata_asynchronously = 0);
 """
 
 $CLICKHOUSE_CLIENT -nm --query """
diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
index d4191af1594..41a60204eab 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference
@@ -1,2 +1,2 @@
-1048576	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection_sql	0	5000	0	16
-1048576	10000000	33554432	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection	0	5000	0	16
+1048576	10000000	33554432	1	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection_sql	0	5000	0	16
+1048576	10000000	33554432	1	4194304	0	0	0	0	/var/lib/clickhouse/filesystem_caches/collection	0	5000	0	16
diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
index c7216833bc9..127baa8304e 100644
--- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
+++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql
@@ -3,8 +3,8 @@
 CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi';
 DROP TABLE IF EXISTS test;
 CREATE TABLE test (a Int32, b String)
-ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql');
+ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql', load_metadata_asynchronously = 0);
 DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME';
 CREATE TABLE test2 (a Int32, b String)
-ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection');
+ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection', load_metadata_asynchronously = 0);
 DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME_2';
diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference
index 17a25d82824..0f64d0393b2 100644
--- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference
+++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference
@@ -1,7 +1,7 @@
-134217728	10000000	33554432	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	0	0	0	16
-134217728	10000000	33554432	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	10	1000	0	16
-134217728	10000000	33554432	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	5	1000	0	16
-134217728	10000000	33554432	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	15	1000	0	16
-134217728	10000000	33554432	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	2	1000	0	16
-134217728	10000000	33554432	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	0	1000	0	16
-134217728	10000000	33554432	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	0	0	0	16
+134217728	10000000	33554432	1	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	0	0	0	16
+134217728	10000000	33554432	1	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	10	1000	0	16
+134217728	10000000	33554432	1	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	5	1000	0	16
+134217728	10000000	33554432	1	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	15	1000	0	16
+134217728	10000000	33554432	1	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	2	1000	0	16
+134217728	10000000	33554432	1	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	0	1000	0	16
+134217728	10000000	33554432	1	4194304	1	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02933/	0	0	0	16
diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
index 298cc908178..c6bbcdc20c2 100644
--- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
+++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference
@@ -1,20 +1,20 @@
-100	10	10	10	0	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
+100	10	10	1	10	0	0	0	0	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
 0
 10
 98
 set max_size from 100 to 10
-10	10	10	10	0	0	8	1	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
+10	10	10	1	10	0	0	8	1	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
 1
 8
 set max_size from 10 to 100
-100	10	10	10	0	0	8	1	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
+100	10	10	1	10	0	0	8	1	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
 10
 98
 set max_elements from 10 to 2
-100	2	10	10	0	0	18	2	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
+100	2	10	1	10	0	0	18	2	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
 2
 18
 set max_elements from 2 to 10
-100	10	10	10	0	0	18	2	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
+100	10	10	1	10	0	0	18	2	/var/lib/clickhouse/filesystem_caches/s3_cache_02944/	0	5000	0	16
 10
 98

From dfd17cc2d71555de9c42ad6085c35bb3f1372dd1 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 16 Aug 2024 13:23:57 +0000
Subject: [PATCH 158/363] Check for invalid regexp in JSON SKIP REGEXP section

---
 src/DataTypes/DataTypeObject.cpp                      | 11 +++++++++++
 .../0_stateless/03227_json_invalid_regexp.reference   |  0
 .../queries/0_stateless/03227_json_invalid_regexp.sql |  4 ++++
 3 files changed, 15 insertions(+)
 create mode 100644 tests/queries/0_stateless/03227_json_invalid_regexp.reference
 create mode 100644 tests/queries/0_stateless/03227_json_invalid_regexp.sql

diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp
index d6395155397..11fffd8769b 100644
--- a/src/DataTypes/DataTypeObject.cpp
+++ b/src/DataTypes/DataTypeObject.cpp
@@ -49,6 +49,17 @@ DataTypeObject::DataTypeObject(
     , max_dynamic_paths(max_dynamic_paths_)
     , max_dynamic_types(max_dynamic_types_)
 {
+    /// Check if regular expressions are valid.
+    for (const auto & regexp_str : path_regexps_to_skip)
+    {
+        re2::RE2::Options options;
+        /// Don't log errors to stderr.
+        options.set_log_errors(false);
+        auto regexp = re2::RE2(regexp_str, options);
+        if (!regexp.error().empty())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid regexp '{}': {}", regexp_str, regexp.error());
+    }
+
     for (const auto & [typed_path, type] : typed_paths)
     {
         for (const auto & path_to_skip : paths_to_skip)
diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.reference b/tests/queries/0_stateless/03227_json_invalid_regexp.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.sql b/tests/queries/0_stateless/03227_json_invalid_regexp.sql
new file mode 100644
index 00000000000..734dea1aac6
--- /dev/null
+++ b/tests/queries/0_stateless/03227_json_invalid_regexp.sql
@@ -0,0 +1,4 @@
+set allow_experimental_json_type = 1;
+create table test (json JSON(SKIP REGEXP '[]')) engine=Memory(); -- {serverError BAD_ARGUMENTS}
+create table test (json JSON(SKIP REGEXP '+')) engine=Memory(); -- {serverError BAD_ARGUMENTS};
+

From 6bd65dbfa5c5d450e355dc64c110db65d2f56cbb Mon Sep 17 00:00:00 2001
From: Aleksei Filatov <alexfvk@yandex-team.ru>
Date: Fri, 16 Aug 2024 15:07:53 +0000
Subject: [PATCH 159/363] Use HTTP/1.1 for external HTTP authentication

---
 src/Access/HTTPAuthClient.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Access/HTTPAuthClient.h b/src/Access/HTTPAuthClient.h
index a8b56cf05a7..a1b97a729a3 100644
--- a/src/Access/HTTPAuthClient.h
+++ b/src/Access/HTTPAuthClient.h
@@ -82,7 +82,8 @@ public:
 
     Result authenticate(const String & user_name, const String & password) const
     {
-        Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery()};
+        Poco::Net::HTTPRequest request{
+            Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1};
         Poco::Net::HTTPBasicCredentials basic_credentials{user_name, password};
         basic_credentials.authenticate(request);
 

From 45e06de3267486296cc1452c981a78688a2193ae Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 16 Aug 2024 18:01:43 +0200
Subject: [PATCH 160/363] Minor update in Dynamic/JSON serializations

---
 src/DataTypes/Serializations/SerializationObject.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp
index 2dd25e540cc..0042aa6d89d 100644
--- a/src/DataTypes/Serializations/SerializationObject.cpp
+++ b/src/DataTypes/Serializations/SerializationObject.cpp
@@ -199,7 +199,7 @@ void SerializationObject::serializeBinaryBulkStatePrefix(
     auto object_state = std::make_shared<SerializeBinaryBulkStateObject>(serialization_version);
     object_state->max_dynamic_paths = column_object.getMaxDynamicPaths();
     /// Write max_dynamic_paths parameter.
-    writeBinaryLittleEndian(object_state->max_dynamic_paths, *stream);
+    writeVarUInt(object_state->max_dynamic_paths, *stream);
     /// Write all dynamic paths in sorted order.
     object_state->sorted_dynamic_paths.reserve(dynamic_paths.size());
     for (const auto & [path, _] : dynamic_paths)
@@ -354,7 +354,7 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationObject::deserializeOb
         readBinaryLittleEndian(serialization_version, *structure_stream);
         auto structure_state = std::make_shared<DeserializeBinaryBulkStateObjectStructure>(serialization_version);
         /// Read max_dynamic_paths parameter.
-        readBinaryLittleEndian(structure_state->max_dynamic_paths, *structure_stream);
+        readVarUInt(structure_state->max_dynamic_paths, *structure_stream);
         /// Read the sorted list of dynamic paths.
         size_t dynamic_paths_size;
         readVarUInt(dynamic_paths_size, *structure_stream);

From c85d5e753899503f93a8f9ca7b67776d386d9130 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 16 Aug 2024 18:02:51 +0200
Subject: [PATCH 161/363] Update Dynamic serialization

---
 src/DataTypes/Serializations/SerializationDynamic.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index 6bba87c40fa..ab24779ced2 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -115,7 +115,7 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
     dynamic_state->max_dynamic_types = column_dynamic.getMaxDynamicTypes();
     /// Write max_dynamic_types parameter, because it can differ from the max_dynamic_types
     /// that is specified in the Dynamic type (we could decrease it before merge).
-    writeBinaryLittleEndian(dynamic_state->max_dynamic_types, *stream);
+    writeVarUInt(dynamic_state->max_dynamic_types, *stream);
 
     dynamic_state->variant_type = variant_info.variant_type;
     dynamic_state->variant_names = variant_info.variant_names;
@@ -123,7 +123,7 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
 
     /// Write information about variants.
     size_t num_variants = dynamic_state->variant_names.size() - 1; /// Don't write shared variant, Dynamic column should always have it.
-    writeBinaryLittleEndian(num_variants, *stream);
+    writeVarUInt(num_variants, *stream);
     if (settings.data_types_binary_encoding)
     {
         const auto & variants = assert_cast<const DataTypeVariant &>(*dynamic_state->variant_type).getVariants();
@@ -252,11 +252,11 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD
         readBinaryLittleEndian(structure_version, *structure_stream);
         auto structure_state = std::make_shared<DeserializeBinaryBulkStateDynamicStructure>(structure_version);
         /// Read max_dynamic_types parameter.
-        readBinaryLittleEndian(structure_state->max_dynamic_types, *structure_stream);
+        readVarUInt(structure_state->max_dynamic_types, *structure_stream);
         /// Read information about variants.
         DataTypes variants;
         size_t num_variants;
-        readBinaryLittleEndian(num_variants, *structure_stream);
+        readVarUInt(num_variants, *structure_stream);
         variants.reserve(num_variants + 1); /// +1 for shared variant.
         if (settings.data_types_binary_encoding)
         {

From 4f84c82d6d53ded0adda46aac1db1d345b5ba2eb Mon Sep 17 00:00:00 2001
From: Linh Giang <165205637+linhgiang24@users.noreply.github.com>
Date: Fri, 16 Aug 2024 11:02:44 -0600
Subject: [PATCH 162/363] Update grant.md to include POSTGRES privilege

Added POSTGRES privilege under the SOURCES category as it seems to be missing.
---
 docs/en/sql-reference/statements/grant.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md
index 43fa344a16d..6118f4c1d36 100644
--- a/docs/en/sql-reference/statements/grant.md
+++ b/docs/en/sql-reference/statements/grant.md
@@ -200,6 +200,7 @@ Hierarchy of privileges:
     - `JDBC`
     - `HDFS`
     - `S3`
+    - `POSTGRES`
 - [dictGet](#dictget)
 - [displaySecretsInShowAndSelect](#displaysecretsinshowandselect)
 - [NAMED COLLECTION ADMIN](#named-collection-admin)
@@ -476,6 +477,7 @@ Allows using external data sources. Applies to [table engines](../../engines/tab
     - `JDBC`. Level: `GLOBAL`
     - `HDFS`. Level: `GLOBAL`
     - `S3`. Level: `GLOBAL`
+    - `POSTGRES`. Level: `GLOBAL`
 
 The `SOURCES` privilege enables use of all the sources. Also you can grant a privilege for each source individually. To use sources, you need additional privileges.
 

From a6d5047bb09640bcf99bef84f655602d7dfb3361 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 16 Aug 2024 21:29:46 +0100
Subject: [PATCH 163/363] impl

---
 .../MergeTree/MergeTreePrefetchedReadPool.cpp |  4 ++++
 .../MergeTree/MergeTreeReadPoolBase.cpp       |  6 +++++
 .../MergeTreeReadPoolParallelReplicas.cpp     |  5 +++++
 ...rgeTreeReadPoolParallelReplicasInOrder.cpp |  5 +++++
 .../ParallelReplicasReadingCoordinator.cpp    |  4 ++++
 ...icas_read_task_size_overflow_bug.reference |  0
 ...l_replicas_read_task_size_overflow_bug.sql | 22 +++++++++++++++++++
 7 files changed, 46 insertions(+)
 create mode 100644 tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference
 create mode 100644 tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql

diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
index a9b77fb6c03..7081eb716f5 100644
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
@@ -395,6 +395,10 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_
 
         part_stat.prefetch_step_marks = std::max(part_stat.prefetch_step_marks, per_part_infos[i]->min_marks_per_task);
 
+        if (part_stat.prefetch_step_marks == 0)
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
+
         LOG_DEBUG(
             log,
             "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})",
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
index 6d2560bc9c7..9d3c38822e1 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
@@ -13,6 +13,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
 MergeTreeReadPoolBase::MergeTreeReadPoolBase(
@@ -85,6 +86,11 @@ static size_t calculateMinMarksPerTask(
             min_marks_per_task = heuristic_min_marks;
         }
     }
+
+    if (min_marks_per_task == 0)
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
+
     LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task);
     return min_marks_per_task;
 }
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
index 33eaf5a49bd..d23072771f2 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
@@ -8,6 +8,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
 MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
@@ -38,6 +39,10 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
     for (const auto & info : per_part_infos)
         min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task);
 
+    if (min_marks_per_task == 0)
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
+
     extension.all_callback(
         InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica));
 }
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp
index 6b5cf978423..42ffc4304b2 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp
@@ -6,6 +6,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
 }
 
 MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrder(
@@ -37,6 +38,10 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd
     for (const auto & info : per_part_infos)
         min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task);
 
+    if (min_marks_per_task == 0)
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
+
     for (const auto & part : parts_ranges)
         request.push_back({part.data_part->info, MarkRanges{}});
 
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index f46b4de10b7..ee47fe3549a 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -1004,6 +1004,10 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init
 
 ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request)
 {
+    if (request.min_number_of_marks == 0)
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
+
     ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasHandleRequestMicroseconds);
 
     std::lock_guard lock(mutex);
diff --git a/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql
new file mode 100644
index 00000000000..984c7fe0db7
--- /dev/null
+++ b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS test__fuzz_22 SYNC;
+
+CREATE TABLE test__fuzz_22 (k Float32, v String) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 1;
+
+SYSTEM STOP MERGES test__fuzz_22;
+
+INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1);
+INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1);
+INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1);
+INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1);
+
+SET allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost',
+    merge_tree_min_rows_for_concurrent_read = 9223372036854775806, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem = 9223372036854775806;
+
+  SELECT v
+    FROM test__fuzz_22
+ORDER BY v
+   LIMIT 10, 10
+SETTINGS max_threads = 4
+  FORMAT Null; -- { serverError BAD_ARGUMENTS }
+
+DROP TABLE test__fuzz_22 SYNC;

From cc7d22a7b83440cfbf7d37086ece7fac222f24de Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 16 Aug 2024 23:08:16 +0200
Subject: [PATCH 164/363] Proper parsing of the PostgreSQL-style CAST operator

---
 src/Parsers/ExpressionElementParsers.cpp      | 26 +++++++++++--------
 ..._proper_parsing_of_cast_operator.reference |  4 +++
 .../03227_proper_parsing_of_cast_operator.sql |  6 +++++
 3 files changed, 25 insertions(+), 11 deletions(-)
 create mode 100644 tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference
 create mode 100644 tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index dd22b80b1cb..ffa1bd93ded 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -853,9 +853,9 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
 
     /// Parse numbers (including decimals), strings, arrays and tuples of them.
 
+    Pos begin = pos;
     const char * data_begin = pos->begin;
     const char * data_end = pos->end;
-    bool is_string_literal = pos->type == StringLiteral;
 
     if (pos->type == Minus)
     {
@@ -866,7 +866,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
         data_end = pos->end;
         ++pos;
     }
-    else if (pos->type == Number || is_string_literal)
+    else if (pos->type == Number || pos->type == StringLiteral)
     {
         ++pos;
     }
@@ -939,18 +939,22 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     {
         String s;
         size_t data_size = data_end - data_begin;
-        if (is_string_literal)
+        if (begin->type == StringLiteral)
         {
-            ReadBufferFromMemory buf(data_begin, data_size);
-            readQuotedStringWithSQLStyle(s, buf);
-            assert(buf.count() == data_size);
+            ASTPtr literal;
+            if (ParserStringLiteral().parse(begin, literal, expected))
+            {
+                node = createFunctionCast(literal, type_ast);
+                return true;
+            }
+            return false;
         }
         else
-            s = String(data_begin, data_size);
-
-        auto literal = std::make_shared<ASTLiteral>(std::move(s));
-        node = createFunctionCast(literal, type_ast);
-        return true;
+        {
+            auto literal = std::make_shared<ASTLiteral>(String(data_begin, data_size));
+            node = createFunctionCast(literal, type_ast);
+            return true;
+        }
     }
 
     return false;
diff --git a/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference
new file mode 100644
index 00000000000..2127d396bb3
--- /dev/null
+++ b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference
@@ -0,0 +1,4 @@
+414243
+ABC
+A
+{"a": \'A\'}
diff --git a/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql
new file mode 100644
index 00000000000..0c2e7dc582a
--- /dev/null
+++ b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql
@@ -0,0 +1,6 @@
+SELECT '414243'::String;
+SELECT x'414243'::String;
+SELECT b'01000001'::String;
+SELECT '{"a": \'\x41\'}'::String;
+SELECT '{"a": \'\x4\'}'::String; -- { clientError SYNTAX_ERROR }
+SELECT '{"a": \'a\x4\'}'::String; -- { clientError SYNTAX_ERROR }

From aee031ad4468b870073dc46770d07cea07aa829f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 16 Aug 2024 23:25:49 +0200
Subject: [PATCH 165/363] Slightly better

---
 src/Parsers/ExpressionElementParsers.cpp | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index ffa1bd93ded..726326bfc85 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -856,6 +856,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     Pos begin = pos;
     const char * data_begin = pos->begin;
     const char * data_end = pos->end;
+    ASTPtr string_literal;
 
     if (pos->type == Minus)
     {
@@ -866,10 +867,15 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
         data_end = pos->end;
         ++pos;
     }
-    else if (pos->type == Number || pos->type == StringLiteral)
+    else if (pos->type == Number)
     {
         ++pos;
     }
+    else if (pos->type == StringLiteral)
+    {
+        if (!ParserStringLiteral().parse(begin, string_literal, expected))
+            return false;
+    }
     else if (isOneOf<OpeningSquareBracket, OpeningRoundBracket>(pos->type))
     {
         TokenType last_token = OpeningSquareBracket;
@@ -939,15 +945,10 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     {
         String s;
         size_t data_size = data_end - data_begin;
-        if (begin->type == StringLiteral)
+        if (string_literal)
         {
-            ASTPtr literal;
-            if (ParserStringLiteral().parse(begin, literal, expected))
-            {
-                node = createFunctionCast(literal, type_ast);
-                return true;
-            }
-            return false;
+            node = createFunctionCast(string_literal, type_ast);
+            return true;
         }
         else
         {

From d952f7cff579d28a51eea428aee7460121862ce5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 16 Aug 2024 23:50:26 +0200
Subject: [PATCH 166/363] Update test

---
 tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
index acb4925ce6e..ee702300094 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
@@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata"
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
 
-cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata FORMAT JSONAsObject"
+cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_block_size 8192 --max_insert_block_size 8192 --max_insert_threads 1 --min_insert_block_size_bytes 0 --min_insert_block_size_rows 0 -q "INSERT INTO ghdata FORMAT JSONAsObject"
 
 ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)"
 

From b98249ea7fda526a7a561862fcc4a721e5a4587f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 00:06:47 +0200
Subject: [PATCH 167/363] Use temporary tables for input and output in
 clickhouse-local

---
 programs/local/LocalServer.cpp                                | 2 +-
 tests/queries/0_stateless/01191_rename_dictionary.sql         | 1 +
 .../02141_clickhouse_local_interactive_table.reference        | 4 ++--
 .../0_stateless/02141_clickhouse_local_interactive_table.sh   | 4 ++--
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 200beea7b63..a8b774562f9 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -367,7 +367,7 @@ std::string LocalServer::getInitialCreateTableQuery()
     else
         table_structure = "(" + table_structure + ")";
 
-    return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});",
+    return fmt::format("CREATE TEMPORARY TABLE {} {} ENGINE = File({}, {});",
                        table_name, table_structure, data_format, table_file);
 }
 
diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql
index c5012dabc81..be95e5a7d4b 100644
--- a/tests/queries/0_stateless/01191_rename_dictionary.sql
+++ b/tests/queries/0_stateless/01191_rename_dictionary.sql
@@ -27,6 +27,7 @@ RENAME DICTIONARY test_01191.t TO test_01191.dict1; -- {serverError INCORRECT_QU
 DROP DICTIONARY test_01191.t; -- {serverError INCORRECT_QUERY}
 DROP TABLE test_01191.t;
 
+DROP DATABASE IF EXISTS dummy_db;
 CREATE DATABASE dummy_db ENGINE=Atomic;
 RENAME DICTIONARY test_01191.dict TO dummy_db.dict1;
 RENAME DICTIONARY dummy_db.dict1 TO test_01191.dict;
diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference
index 0bb8966cbe4..0e74c0a083e 100644
--- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference
+++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference
@@ -1,2 +1,2 @@
-CREATE TABLE default.`table`\n(\n    `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\')
-CREATE TABLE foo.`table`\n(\n    `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\')
+CREATE TEMPORARY TABLE `table`\n(\n    `key` String\n)\nENGINE = File(TSVWithNamesAndTypes, \'/dev/null\')
+CREATE TEMPORARY TABLE `table`\n(\n    `key` String\n)\nENGINE = File(TSVWithNamesAndTypes, \'/dev/null\')
diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh
index 934d87616ac..3a95e59416a 100755
--- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh
+++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh
@@ -4,5 +4,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table'
-$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table'
+$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create temporary table table'
+$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create temporary table table'

From 8ba142559ca05295670b0a899610ab613c2d5658 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 00:09:39 +0200
Subject: [PATCH 168/363] Pass-through RENAME and UUID-related operations in
 Overlay database to underlying databases

---
 src/Databases/DatabasesOverlay.cpp | 47 ++++++++++++++++++++++++++++++
 src/Databases/DatabasesOverlay.h   |  9 ++++++
 src/Interpreters/StorageID.h       |  1 -
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp
index 801356b3dd7..495733e15fd 100644
--- a/src/Databases/DatabasesOverlay.cpp
+++ b/src/Databases/DatabasesOverlay.cpp
@@ -14,6 +14,8 @@ namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
     extern const int CANNOT_GET_CREATE_TABLE_QUERY;
+    extern const int BAD_ARGUMENTS;
+    extern const int UNKNOWN_TABLE;
 }
 
 DatabasesOverlay::DatabasesOverlay(const String & name_, ContextPtr context_)
@@ -124,6 +126,39 @@ StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & tab
         getEngineName());
 }
 
+void DatabasesOverlay::renameTable(
+    ContextPtr current_context,
+    const String & name,
+    IDatabase & to_database,
+    const String & to_name,
+    bool exchange,
+    bool dictionary)
+{
+    for (auto & db : databases)
+    {
+        if (db->isTableExist(name, current_context))
+        {
+            if (DatabasesOverlay * to_overlay_database = typeid_cast<DatabasesOverlay *>(&to_database))
+            {
+                /// Renaming from Overlay database inside itself or into another Overlay database.
+                /// Just use the first database in the overlay as a destination.
+                if (to_overlay_database->databases.empty())
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "The destination Overlay database {} does not have any members", to_database.getDatabaseName());
+
+                db->renameTable(current_context, name, *to_overlay_database->databases[0], to_name, exchange, dictionary);
+            }
+            else
+            {
+                /// Renaming into a different type of database. E.g. from Overlay on top of Atomic database into just Atomic database.
+                db->renameTable(current_context, name, to_database, to_name, exchange, dictionary);
+            }
+
+            return;
+        }
+    }
+    throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuote(getDatabaseName()), backQuote(name));
+}
+
 ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr context_, bool throw_on_error) const
 {
     ASTPtr result = nullptr;
@@ -178,6 +213,18 @@ String DatabasesOverlay::getTableDataPath(const ASTCreateQuery & query) const
     return result;
 }
 
+UUID DatabasesOverlay::getUUID() const
+{
+    UUID result = UUIDHelpers::Nil;
+    for (const auto & db : databases)
+    {
+        result = db->getUUID();
+        if (result != UUIDHelpers::Nil)
+            break;
+    }
+    return result;
+}
+
 UUID DatabasesOverlay::tryGetTableUUID(const String & table_name) const
 {
     UUID result = UUIDHelpers::Nil;
diff --git a/src/Databases/DatabasesOverlay.h b/src/Databases/DatabasesOverlay.h
index b0c7e7e4032..40c653e5cb5 100644
--- a/src/Databases/DatabasesOverlay.h
+++ b/src/Databases/DatabasesOverlay.h
@@ -35,12 +35,21 @@ public:
 
     StoragePtr detachTable(ContextPtr context, const String & table_name) override;
 
+    void renameTable(
+        ContextPtr current_context,
+        const String & name,
+        IDatabase & to_database,
+        const String & to_name,
+        bool exchange,
+        bool dictionary) override;
+
     ASTPtr getCreateTableQueryImpl(const String & name, ContextPtr context, bool throw_on_error) const override;
     ASTPtr getCreateDatabaseQuery() const override;
 
     String getTableDataPath(const String & table_name) const override;
     String getTableDataPath(const ASTCreateQuery & query) const override;
 
+    UUID getUUID() const override;
     UUID tryGetTableUUID(const String & table_name) const override;
 
     void drop(ContextPtr context) override;
diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h
index f9afbc7b98d..ad55d16e284 100644
--- a/src/Interpreters/StorageID.h
+++ b/src/Interpreters/StorageID.h
@@ -27,7 +27,6 @@ class ASTQueryWithTableAndOutput;
 class ASTTableIdentifier;
 class Context;
 
-// TODO(ilezhankin): refactor and merge |ASTTableIdentifier|
 struct StorageID
 {
     String database_name;

From da0a8051d8c8e8c2c72145e15cdf5a96e99641d2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 00:22:57 +0200
Subject: [PATCH 169/363] Miscellaneous changes in database engines

---
 src/Databases/DatabaseLazy.cpp     |  2 +-
 src/Databases/DatabaseLazy.h       |  2 +-
 src/Databases/DatabaseOnDisk.cpp   | 10 +++++-----
 src/Databases/DatabaseOnDisk.h     |  4 ++--
 src/Databases/DatabaseOrdinary.cpp |  4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index 3fb6d30fcb8..2ccdd8510a8 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -52,7 +52,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_,
 
 void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/)
 {
-    iterateMetadataFiles(local_context, [this, &local_context](const String & file_name)
+    iterateMetadataFiles([this, &local_context](const String & file_name)
     {
         const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4));
 
diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h
index 41cfb751141..aeac130594f 100644
--- a/src/Databases/DatabaseLazy.h
+++ b/src/Databases/DatabaseLazy.h
@@ -12,7 +12,7 @@ class DatabaseLazyIterator;
 class Context;
 
 /** Lazy engine of databases.
-  * Works like DatabaseOrdinary, but stores in memory only the cache.
+  * Works like DatabaseOrdinary, but stores only recently accessed tables in memory.
   * Can be used only with *Log engines.
   */
 class DatabaseLazy final : public DatabaseOnDisk
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 734f354d9a5..c80e4def94e 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -568,14 +568,14 @@ void DatabaseOnDisk::drop(ContextPtr local_context)
     assert(TSA_SUPPRESS_WARNING_FOR_READ(tables).empty());
     if (local_context->getSettingsRef().force_remove_data_recursively_on_drop)
     {
-        (void)fs::remove_all(local_context->getPath() + getDataPath());
+        (void)fs::remove_all(std::filesystem::path(getContext()->getPath()) / data_path);
         (void)fs::remove_all(getMetadataPath());
     }
     else
     {
         try
         {
-            (void)fs::remove(local_context->getPath() + getDataPath());
+            (void)fs::remove(std::filesystem::path(getContext()->getPath()) / data_path);
             (void)fs::remove(getMetadataPath());
         }
         catch (const fs::filesystem_error & e)
@@ -613,7 +613,7 @@ time_t DatabaseOnDisk::getObjectMetadataModificationTime(const String & object_n
     }
 }
 
-void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const IteratingFunction & process_metadata_file) const
+void DatabaseOnDisk::iterateMetadataFiles(const IteratingFunction & process_metadata_file) const
 {
     auto process_tmp_drop_metadata_file = [&](const String & file_name)
     {
@@ -621,7 +621,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
         static const char * tmp_drop_ext = ".sql.tmp_drop";
         const std::string object_name = file_name.substr(0, file_name.size() - strlen(tmp_drop_ext));
 
-        if (fs::exists(local_context->getPath() + getDataPath() + '/' + object_name))
+        if (fs::exists(std::filesystem::path(getContext()->getPath()) / data_path / object_name))
         {
             fs::rename(getMetadataPath() + file_name, getMetadataPath() + object_name + ".sql");
             LOG_WARNING(log, "Object {} was not dropped previously and will be restored", backQuote(object_name));
@@ -638,7 +638,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
     std::vector<std::pair<String, bool>> metadata_files;
 
     fs::directory_iterator dir_end;
-    for (fs::directory_iterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it)
+    for (fs::directory_iterator dir_it(metadata_path); dir_it != dir_end; ++dir_it)
     {
         String file_name = dir_it->path().filename();
         /// For '.svn', '.gitignore' directory and similar.
diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h
index 12656068643..ffc95a7c128 100644
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@@ -64,7 +64,7 @@ public:
     time_t getObjectMetadataModificationTime(const String & object_name) const override;
 
     String getDataPath() const override { return data_path; }
-    String getTableDataPath(const String & table_name) const override { return data_path + escapeForFileName(table_name) + "/"; }
+    String getTableDataPath(const String & table_name) const override { return std::filesystem::path(data_path) / escapeForFileName(table_name) / ""; }
     String getTableDataPath(const ASTCreateQuery & query) const override { return getTableDataPath(query.getTable()); }
     String getMetadataPath() const override { return metadata_path; }
 
@@ -83,7 +83,7 @@ protected:
 
     using IteratingFunction = std::function<void(const String &)>;
 
-    void iterateMetadataFiles(ContextPtr context, const IteratingFunction & process_metadata_file) const;
+    void iterateMetadataFiles(const IteratingFunction & process_metadata_file) const;
 
     ASTPtr getCreateTableQueryImpl(
         const String & table_name,
diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 8808261654f..dd8a3f42ea8 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -55,7 +55,7 @@ static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
 static constexpr const char * const CONVERT_TO_REPLICATED_FLAG_NAME = "convert_to_replicated";
 
 DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_)
-    : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_)
+    : DatabaseOrdinary(name_, metadata_path_, std::filesystem::path("data") / escapeForFileName(name_) / "", "DatabaseOrdinary (" + name_ + ")", context_)
 {
 }
 
@@ -265,7 +265,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables
         }
     };
 
-    iterateMetadataFiles(local_context, process_metadata);
+    iterateMetadataFiles(process_metadata);
 
     size_t objects_in_database = metadata.parsed_tables.size() - prev_tables_count;
     size_t dictionaries_in_database = metadata.total_dictionaries - prev_total_dictionaries;

From 07fa798ffa53216fd37dac51bceb327665fc8dda Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 16 Aug 2024 23:31:41 +0000
Subject: [PATCH 170/363] add total in system.one, test

---
 src/Storages/System/StorageSystemOne.cpp       |  5 ++++-
 .../System/StorageSystemViewRefreshes.cpp      |  2 +-
 ...3221_refreshable_matview_progress.reference |  2 ++
 .../03221_refreshable_matview_progress.sql     | 18 ++++++++++++++++++
 4 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03221_refreshable_matview_progress.reference
 create mode 100644 tests/queries/0_stateless/03221_refreshable_matview_progress.sql

diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp
index 936d55e61a0..70377715dc3 100644
--- a/src/Storages/System/StorageSystemOne.cpp
+++ b/src/Storages/System/StorageSystemOne.cpp
@@ -41,7 +41,10 @@ Pipe StorageSystemOne::read(
     auto column = DataTypeUInt8().createColumnConst(1, 0u)->convertToFullColumnIfConst();
     Chunk chunk({ std::move(column) }, 1);
 
-    return Pipe(std::make_shared<SourceFromSingleChunk>(std::move(header), std::move(chunk)));
+    auto source = std::make_shared<SourceFromSingleChunk>(std::move(header), std::move(chunk));
+    source->addTotalRowsApprox(1);
+
+    return Pipe(source);
 }
 
 
diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp
index 3941c4c39c2..30539ed6b6a 100644
--- a/src/Storages/System/StorageSystemViewRefreshes.cpp
+++ b/src/Storages/System/StorageSystemViewRefreshes.cpp
@@ -86,7 +86,7 @@ void StorageSystemViewRefreshes::fillData(
 
         res_columns[i++]->insert(refresh.exception_message);
         res_columns[i++]->insert(refresh.refresh_count);
-        res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0));
+        res_columns[i++]->insert(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read);
         res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9);
         res_columns[i++]->insert(refresh.progress.read_rows);
         res_columns[i++]->insert(refresh.progress.read_bytes);
diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.reference b/tests/queries/0_stateless/03221_refreshable_matview_progress.reference
new file mode 100644
index 00000000000..5ed392e61c7
--- /dev/null
+++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.reference
@@ -0,0 +1,2 @@
+0
+4	4	1
diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
new file mode 100644
index 00000000000..4794359fd2b
--- /dev/null
+++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
@@ -0,0 +1,18 @@
+set allow_experimental_refreshable_materialized_view=1;
+
+CREATE MATERIALIZED VIEW 03221_rmv
+REFRESH AFTER 1 SECOND
+(
+x UInt64
+)
+ENGINE = Memory
+AS SELECT number AS x
+FROM numbers(3)
+UNION ALL
+SELECT rand64() AS x;
+
+SELECT sleep(2);
+
+SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE view = '03221_rmv';
+
+DROP TABLE 03221_rmv;

From 142d7b15828c9eeef145fd56d56b33d1004fb68c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 01:35:49 +0200
Subject: [PATCH 171/363] Miscellaneous changes in BaseDaemon

---
 src/Daemon/BaseDaemon.cpp | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index e7ae8ea5a1d..f75699881bd 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -16,39 +16,29 @@
 #include <sys/resource.h>
 
 #if defined(OS_LINUX)
-    #include <sys/prctl.h>
+#include <sys/prctl.h>
 #endif
 #include <cerrno>
 #include <cstring>
 #include <unistd.h>
-
 #include <algorithm>
 #include <typeinfo>
 #include <iostream>
-#include <fstream>
 #include <memory>
-#include <base/scope_guard.h>
 
 #include <Poco/Message.h>
 #include <Poco/Util/Application.h>
 #include <Poco/Exception.h>
 #include <Poco/ErrorHandler.h>
 #include <Poco/Pipe.h>
-
 #include <Common/ErrorHandlers.h>
 #include <Common/SignalHandlers.h>
 #include <base/argsToConfig.h>
-#include <base/getThreadId.h>
 #include <base/coverage.h>
-#include <base/sleep.h>
 
 #include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
-#include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
 #include <Common/Exception.h>
-#include <Common/PipeFDs.h>
-#include <Common/StackTrace.h>
 #include <Common/getMultipleKeysFromConfig.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/Config/ConfigProcessor.h>
@@ -459,13 +449,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
     signal_listener_thread.start(*signal_listener);
 
 #if defined(__ELF__) && !defined(OS_FREEBSD)
-    String build_id_hex = SymbolIndex::instance().getBuildIDHex();
-    if (build_id_hex.empty())
-        build_id = "";
-    else
-        build_id = build_id_hex;
-#else
-    build_id = "";
+    build_id = SymbolIndex::instance().getBuildIDHex();
 #endif
 
     git_hash = GIT_HASH;

From 83608cb7bfcbf0801994cc916ed36b91c7113307 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 01:40:07 +0200
Subject: [PATCH 172/363] Miscellaneous changes from #66999 (2)

---
 base/base/CMakeLists.txt                 | 3 +++
 {src/Daemon => base/base}/GitHash.cpp.in | 0
 src/Daemon/CMakeLists.txt                | 3 ---
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename {src/Daemon => base/base}/GitHash.cpp.in (100%)

diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt
index 247028b96e0..3d236f52c36 100644
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@@ -8,6 +8,8 @@ endif ()
 # when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16.
 set (CMAKE_CXX_STANDARD 20)
 
+configure_file(GitHash.cpp.in GitHash.generated.cpp)
+
 set (SRCS
     argsToConfig.cpp
     cgroupsv2.cpp
@@ -33,6 +35,7 @@ set (SRCS
     safeExit.cpp
     throwError.cpp
     Numa.cpp
+    GitHash.generated.cpp
 )
 
 add_library (common ${SRCS})
diff --git a/src/Daemon/GitHash.cpp.in b/base/base/GitHash.cpp.in
similarity index 100%
rename from src/Daemon/GitHash.cpp.in
rename to base/base/GitHash.cpp.in
diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt
index 35ea2122dbb..2068af2200d 100644
--- a/src/Daemon/CMakeLists.txt
+++ b/src/Daemon/CMakeLists.txt
@@ -1,10 +1,7 @@
-configure_file(GitHash.cpp.in GitHash.generated.cpp)
-
 add_library (daemon
     BaseDaemon.cpp
     GraphiteWriter.cpp
     SentryWriter.cpp
-    GitHash.generated.cpp
 )
 
 target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_parsers clickhouse_common_io clickhouse_common_config)

From 4b68ba23c0372331401cd327ca5849ba9d1bce8d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 03:43:53 +0200
Subject: [PATCH 173/363] Pretty print tuples in CREATE TABLE statements

---
 programs/format/Format.cpp                    |  6 +++++-
 src/Client/ClientBase.cpp                     |  6 +++++-
 src/Core/ExternalTable.cpp                    |  4 ++--
 src/Core/Settings.h                           |  2 +-
 src/DataTypes/IDataType.cpp                   |  1 -
 src/Databases/DatabaseOnDisk.cpp              |  2 +-
 src/Functions/formatQuery.cpp                 |  8 +++++++-
 .../InterpreterShowCreateQuery.cpp            |  5 ++++-
 .../formatWithPossiblyHidingSecrets.h         |  3 ++-
 src/Parsers/ASTColumnDeclaration.cpp          |  8 ++------
 src/Parsers/ASTDataType.cpp                   | 19 ++++++++++++++-----
 src/Parsers/ASTExpressionList.cpp             |  5 ++---
 src/Parsers/ASTNameTypePair.cpp               |  6 +-----
 src/Parsers/IAST.cpp                          |  3 ++-
 src/Parsers/IAST.h                            | 14 +++++++++-----
 15 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp
index a434c9171e9..4af77533c53 100644
--- a/programs/format/Format.cpp
+++ b/programs/format/Format.cpp
@@ -264,7 +264,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
                     if (!backslash)
                     {
                         WriteBufferFromOwnString str_buf;
-                        formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length);
+                        oneline = oneline || approx_query_length < max_line_length;
+                        IAST::FormatSettings settings(str_buf, oneline, hilite);
+                        settings.show_secrets = true;
+                        settings.print_pretty_type_names = !oneline;
+                        res->format(settings);
 
                         if (insert_query_payload)
                         {
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 187ef079eda..74357d33f1c 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -331,7 +331,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting
     {
         output_stream << std::endl;
         WriteBufferFromOStream res_buf(output_stream, 4096);
-        formatAST(*res, res_buf);
+        IAST::FormatSettings format_settings(res_buf, /* one_line */ false);
+        format_settings.hilite = true;
+        format_settings.show_secrets = true;
+        format_settings.print_pretty_type_names = true;
+        res->format(format_settings);
         res_buf.finalize();
         output_stream << std::endl << std::endl;
     }
diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index c2bcf6ec651..4ff0d7092d8 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -85,7 +85,7 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg
         /// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction.
         /// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty.
         if (column)
-            structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true));
+            structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false));
         else
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage());
     }
@@ -102,7 +102,7 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error);
 
     for (size_t i = 0; i < type_list_raw->children.size(); ++i)
-        structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true));
+        structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true, false));
 }
 
 void BaseExternalTable::initSampleBlock()
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 0d84ad9022a..1a71494c8cd 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -882,7 +882,7 @@ class IColumn;
     M(Bool, use_json_alias_for_old_object_type, false, "When enabled, JSON type alias will create old experimental Object type instead of a new JSON type", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
-    M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \
+    M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function", 0) \
     M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \
     M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \
     M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \
diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index 7fd8a85aeca..49e5b2d022e 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -8,7 +8,6 @@
 #include <Common/quoteString.h>
 
 #include <IO/WriteHelpers.h>
-#include <IO/Operators.h>
 
 #include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypeCustom.h>
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 734f354d9a5..4e1ddd8cc77 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -504,7 +504,7 @@ void DatabaseOnDisk::renameTable(
 }
 
 
-/// It returns create table statement (even if table is detached)
+/// It returns the create table statement (even if table is detached)
 ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const String & table_name, ContextPtr, bool throw_on_error) const
 {
     ASTPtr ast;
diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp
index 9591ea95254..be633bdfe37 100644
--- a/src/Functions/formatQuery.cpp
+++ b/src/Functions/formatQuery.cpp
@@ -43,6 +43,7 @@ public:
         max_query_size = settings.max_query_size;
         max_parser_depth = settings.max_parser_depth;
         max_parser_backtracks = settings.max_parser_backtracks;
+        print_pretty_type_names = settings.print_pretty_type_names;
     }
 
     String getName() const override { return name; }
@@ -138,7 +139,11 @@ private:
                 }
             }
 
-            formatAST(*ast, buf, /*hilite*/ false, /*single_line*/ output_formatting == OutputFormatting::SingleLine);
+            IAST::FormatSettings settings(buf, output_formatting == OutputFormatting::SingleLine, /*hilite*/ false);
+            settings.show_secrets = true;
+            settings.print_pretty_type_names = print_pretty_type_names;
+            ast->format(settings);
+
             auto formatted = buf.stringView();
 
             const size_t res_data_new_size = res_data_size + formatted.size() + 1;
@@ -165,6 +170,7 @@ private:
     size_t max_query_size;
     size_t max_parser_depth;
     size_t max_parser_backtracks;
+    bool print_pretty_type_names;
 };
 
 }
diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp
index e5549b2e539..7af6b4948f8 100644
--- a/src/Interpreters/InterpreterShowCreateQuery.cpp
+++ b/src/Interpreters/InterpreterShowCreateQuery.cpp
@@ -97,7 +97,10 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl()
     }
 
     MutableColumnPtr column = ColumnString::create();
-    column->insert(format({.ctx = getContext(), .query = *create_query, .one_line = false}));
+    column->insert(format({
+        .ctx = getContext(),
+        .query = *create_query,
+        .one_line = false}));
 
     return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{
         std::move(column),
diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h
index ea8c295b169..14e84f1d1a4 100644
--- a/src/Interpreters/formatWithPossiblyHidingSecrets.h
+++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h
@@ -25,7 +25,8 @@ inline String format(const SecretHidingFormatSettings & settings)
         && settings.ctx->getSettingsRef().format_display_secrets_in_show_and_select
         && settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect);
 
-    return settings.query.formatWithPossiblyHidingSensitiveData(settings.max_length, settings.one_line, show_secrets);
+    return settings.query.formatWithPossiblyHidingSensitiveData(
+        settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names);
 }
 
 }
diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp
index c96499095d5..23d653012f8 100644
--- a/src/Parsers/ASTColumnDeclaration.cpp
+++ b/src/Parsers/ASTColumnDeclaration.cpp
@@ -66,17 +66,13 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo
 {
     frame.need_parens = false;
 
-    /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query.
+    /// We have to always backquote column names to avoid ambiguity with INDEX and other declarations in CREATE query.
     format_settings.ostr << backQuote(name);
 
     if (type)
     {
         format_settings.ostr << ' ';
-
-        FormatStateStacked type_frame = frame;
-        type_frame.indent = 0;
-
-        type->formatImpl(format_settings, state, type_frame);
+        type->formatImpl(format_settings, state, frame);
     }
 
     if (null_modifier)
diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp
index 3c17ae8c380..21f56e5f7a2 100644
--- a/src/Parsers/ASTDataType.cpp
+++ b/src/Parsers/ASTDataType.cpp
@@ -40,12 +40,21 @@ void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & stat
     {
         settings.ostr << '(' << (settings.hilite ? hilite_none : "");
 
-        for (size_t i = 0, size = arguments->children.size(); i < size; ++i)
+        if (!settings.one_line && settings.print_pretty_type_names && name == "Tuple")
         {
-            if (i != 0)
-                settings.ostr << ", ";
-
-            arguments->children[i]->formatImpl(settings, state, frame);
+            ++frame.indent;
+            std::string indent_str = settings.one_line ? "" : "\n" + std::string(4 * frame.indent, ' ');
+            for (size_t i = 0, size = arguments->children.size(); i < size; ++i)
+            {
+                if (i != 0)
+                    settings.ostr << ',';
+                settings.ostr << indent_str;
+                arguments->children[i]->formatImpl(settings, state, frame);
+            }
+        }
+        else
+        {
+            arguments->formatImpl(settings, state, frame);
         }
 
         settings.ostr << (settings.hilite ? hilite_function : "") << ')';
diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp
index 61ac482af82..f345b0c6a6f 100644
--- a/src/Parsers/ASTExpressionList.cpp
+++ b/src/Parsers/ASTExpressionList.cpp
@@ -42,7 +42,8 @@ void ASTExpressionList::formatImpl(const FormatSettings & settings, FormatState
 
 void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
-    std::string indent_str = "\n" + std::string(4 * (frame.indent + 1), ' ');
+    ++frame.indent;
+    std::string indent_str = "\n" + std::string(4 * frame.indent, ' ');
 
     if (frame.expression_list_prepend_whitespace)
     {
@@ -50,8 +51,6 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For
             settings.ostr << ' ';
     }
 
-    ++frame.indent;
-
     for (size_t i = 0, size = children.size(); i < size; ++i)
     {
         if (i && separator)
diff --git a/src/Parsers/ASTNameTypePair.cpp b/src/Parsers/ASTNameTypePair.cpp
index e4066081a9b..1515700365f 100644
--- a/src/Parsers/ASTNameTypePair.cpp
+++ b/src/Parsers/ASTNameTypePair.cpp
@@ -23,12 +23,8 @@ ASTPtr ASTNameTypePair::clone() const
 
 void ASTNameTypePair::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
-    std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
-
-    settings.ostr << indent_str << backQuoteIfNeed(name) << ' ';
+    settings.ostr << backQuoteIfNeed(name) << ' ';
     type->formatImpl(settings, state, frame);
 }
 
 }
-
-
diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp
index 37d7f458d61..5bd2c92c60a 100644
--- a/src/Parsers/IAST.cpp
+++ b/src/Parsers/IAST.cpp
@@ -165,11 +165,12 @@ size_t IAST::checkDepthImpl(size_t max_depth) const
     return res;
 }
 
-String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const
+String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const
 {
     WriteBufferFromOwnString buf;
     FormatSettings settings(buf, one_line);
     settings.show_secrets = show_secrets;
+    settings.print_pretty_type_names = print_pretty_type_names;
     format(settings);
     return wipeSensitiveDataAndCutToLength(buf.str(), max_length);
 }
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index e2cf7579667..2293d50b0ec 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -201,6 +201,7 @@ public:
         bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys).
         char nl_or_ws; /// Newline or whitespace.
         LiteralEscapingStyle literal_escaping_style;
+        bool print_pretty_type_names;
 
         explicit FormatSettings(
             WriteBuffer & ostr_,
@@ -209,7 +210,8 @@ public:
             bool always_quote_identifiers_ = false,
             IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks,
             bool show_secrets_ = true,
-            LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular)
+            LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular,
+            bool print_pretty_type_names_ = false)
             : ostr(ostr_)
             , one_line(one_line_)
             , hilite(hilite_)
@@ -218,6 +220,7 @@ public:
             , show_secrets(show_secrets_)
             , nl_or_ws(one_line ? ' ' : '\n')
             , literal_escaping_style(literal_escaping_style_)
+            , print_pretty_type_names(print_pretty_type_names_)
         {
         }
 
@@ -230,6 +233,7 @@ public:
             , show_secrets(other.show_secrets)
             , nl_or_ws(other.nl_or_ws)
             , literal_escaping_style(other.literal_escaping_style)
+            , print_pretty_type_names(other.print_pretty_type_names)
         {
         }
 
@@ -251,7 +255,7 @@ public:
     /// The state that is copied when each node is formatted. For example, nesting level.
     struct FormatStateStacked
     {
-        UInt8 indent = 0;
+        UInt16 indent = 0;
         bool need_parens = false;
         bool expression_list_always_start_on_new_line = false;  /// Line feed and indent before expression list even if it's of single element.
         bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required)
@@ -274,7 +278,7 @@ public:
 
     /// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied.
     /// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience.
-    String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const;
+    String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const;
 
     /** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent
       * behaviour is due to the fact such functions are called from Client which knows nothing about
@@ -283,12 +287,12 @@ public:
       */
     String formatForLogging(size_t max_length = 0) const
     {
-        return formatWithPossiblyHidingSensitiveData(max_length, true, false);
+        return formatWithPossiblyHidingSensitiveData(max_length, true, false, false);
     }
 
     String formatForErrorMessage() const
     {
-        return formatWithPossiblyHidingSensitiveData(0, true, false);
+        return formatWithPossiblyHidingSensitiveData(0, true, false, false);
     }
 
     virtual bool hasSecretParts() const { return childrenHaveSecretParts(); }

From 566e043c2c84a979ca1c05996f6e3a4303708bff Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 03:44:36 +0200
Subject: [PATCH 174/363] Add a test

---
 ...print_pretty_tuples_create_query.reference | 56 +++++++++++++++++++
 .../03227_print_pretty_tuples_create_query.sh | 35 ++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference
 create mode 100755 tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh

diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference
new file mode 100644
index 00000000000..c65dc32a224
--- /dev/null
+++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference
@@ -0,0 +1,56 @@
+
+SHOW CREATE TABLE:
+CREATE TABLE test.test
+(
+    `x` Tuple(
+        a String,
+        b Array(Tuple(
+            c Tuple(
+                e String),
+            d String))),
+    `y` String
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+SETTINGS index_granularity = 8192
+CREATE TABLE test.test
+(
+    `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))),
+    `y` String
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+SETTINGS index_granularity = 8192
+
+clickhouse-format:
+CREATE TABLE test
+(
+    `x` Tuple(
+        a String,
+        b Array(Tuple(
+            c Tuple(
+                e String),
+            d String))),
+    `y` String
+)
+ORDER BY tuple()
+CREATE TABLE test (`x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), `y` String) ORDER BY tuple()
+
+formatQuery:
+CREATE TABLE test
+(
+    `x` Tuple(
+        a String,
+        b Array(Tuple(
+            c Tuple(
+                e String),
+            d String))),
+    `y` String
+)
+ORDER BY tuple()
+CREATE TABLE test
+(
+    `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))),
+    `y` String
+)
+ORDER BY tuple()
diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh
new file mode 100755
index 00000000000..e5614f9f228
--- /dev/null
+++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-asan, no-msan, no-tsan
+# ^ requires S3
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo
+echo "SHOW CREATE TABLE:"
+${CLICKHOUSE_CLIENT} --output-format Raw --query "
+    DROP TABLE IF EXISTS test;
+    CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ();
+    SET print_pretty_type_names = 1;
+    SHOW CREATE TABLE test;
+    SET print_pretty_type_names = 0;
+    SHOW CREATE TABLE test;
+    DROP TABLE test;
+"
+
+echo
+echo "clickhouse-format:"
+${CLICKHOUSE_FORMAT} --query "
+    CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()
+"
+${CLICKHOUSE_FORMAT} --oneline --query "
+    CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()
+"
+
+echo
+echo "formatQuery:"
+${CLICKHOUSE_CLIENT} --output-format Raw --query "
+    SELECT formatQuery('CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()') SETTINGS print_pretty_type_names = 1;
+    SELECT formatQuery('CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()') SETTINGS print_pretty_type_names = 0;
+"

From 3065b386a4e033a802bbc4855d5113e814648848 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 03:49:45 +0200
Subject: [PATCH 175/363] Update documentation

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 1a71494c8cd..d8837d26e54 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -882,7 +882,7 @@ class IColumn;
     M(Bool, use_json_alias_for_old_object_type, false, "When enabled, JSON type alias will create old experimental Object type instead of a new JSON type", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
-    M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function", 0) \
+    M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function, as well as in the `SHOW CREATE TABLE` query and the `formatQuery` function.", 0) \
     M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \
     M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \
     M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \

From 02ec4e2f92ac769f92aebdb714d0d8da1a924984 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 04:00:31 +0200
Subject: [PATCH 176/363] Fix build

---
 src/Parsers/ExpressionElementParsers.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 726326bfc85..61b5723072e 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -943,7 +943,6 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     if (ParserToken(DoubleColon).ignore(pos, expected)
         && ParserDataType().parse(pos, type_ast, expected))
     {
-        String s;
         size_t data_size = data_end - data_begin;
         if (string_literal)
         {

From 7a5df67b3b3b5bd7a8481562e0293150427fef90 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 17 Aug 2024 05:08:58 +0200
Subject: [PATCH 177/363] Fix style

---
 src/Interpreters/InterpreterShowCreateQuery.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp
index 7af6b4948f8..3de6b755609 100644
--- a/src/Interpreters/InterpreterShowCreateQuery.cpp
+++ b/src/Interpreters/InterpreterShowCreateQuery.cpp
@@ -97,10 +97,12 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl()
     }
 
     MutableColumnPtr column = ColumnString::create();
-    column->insert(format({
+    column->insert(format(
+    {
         .ctx = getContext(),
         .query = *create_query,
-        .one_line = false}));
+        .one_line = false
+    }));
 
     return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{
         std::move(column),

From 330086c621c860524b68ce7598d12c8db958101d Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 17 Aug 2024 03:46:33 +0000
Subject: [PATCH 178/363] update 02136_scalar_progress results according to
 fixed bug

---
 tests/queries/0_stateless/02136_scalar_progress.reference | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02136_scalar_progress.reference b/tests/queries/0_stateless/02136_scalar_progress.reference
index 5378c52de89..b8957f78e6d 100644
--- a/tests/queries/0_stateless/02136_scalar_progress.reference
+++ b/tests/queries/0_stateless/02136_scalar_progress.reference
@@ -1,6 +1,7 @@
 < X-ClickHouse-Progress: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"}
 < X-ClickHouse-Progress: {"read_rows":"65505","read_bytes":"524040","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"}
 < X-ClickHouse-Progress: {"read_rows":"100000","read_bytes":"800000","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"}
-< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"}
-< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"}
-< X-ClickHouse-Summary: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"}
+< X-ClickHouse-Progress: {"read_rows":"100000","read_bytes":"800000","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"0","result_bytes":"0"}
+< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"0","result_bytes":"0"}
+< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"1","result_bytes":"272"}
+< X-ClickHouse-Summary: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"1","result_bytes":"272"}

From 00891a2dd8f69dde0b1fe364b6891fd3629d5dbe Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 17 Aug 2024 13:57:24 +0000
Subject: [PATCH 179/363] fix test

---
 .../queries/0_stateless/03221_refreshable_matview_progress.sql  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
index 4794359fd2b..30228277bb5 100644
--- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
+++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
@@ -1,3 +1,5 @@
+-- Tags: no-ordinary-database
+
 set allow_experimental_refreshable_materialized_view=1;
 
 CREATE MATERIALIZED VIEW 03221_rmv

From 9dee9ecfb4adee4cff099f13afe61bdc2c38170e Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Sat, 17 Aug 2024 10:33:35 -0600
Subject: [PATCH 180/363] Fix incorrect default value for
 postgresql_connection_pool_auto_close_connection in docs

---
 docs/en/operations/settings/settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index de601fe02dc..5bf1fe197ae 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1381,7 +1381,7 @@ Default value: `2`.
 
 Close connection before returning connection to the pool.
 
-Default value: true.
+Default value: false.
 
 ## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size}
 

From a42b12725ab37df74a96e85f7c644c90bd6e30f6 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Fri, 16 Aug 2024 17:39:09 +0200
Subject: [PATCH 181/363] CI: Native build for package_aarch64

---
 tests/ci/ci_config.py      |  3 ++-
 tests/ci/ci_definitions.py |  1 +
 tests/ci/test_ci_config.py | 14 ++++++++++----
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 7a19eb6f827..173c6c9c931 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -94,7 +94,8 @@ class CI:
                 package_type="deb",
                 static_binary_name="aarch64",
                 additional_pkgs=True,
-            )
+            ),
+            runner_type=Runners.BUILDER_ARM,
         ),
         BuildNames.PACKAGE_ASAN: CommonJobConfigs.BUILD.with_properties(
             build_config=BuildConfig(
diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py
index 48847b0d7a6..1bed9db06f2 100644
--- a/tests/ci/ci_definitions.py
+++ b/tests/ci/ci_definitions.py
@@ -57,6 +57,7 @@ class Runners(metaclass=WithIter):
     """
 
     BUILDER = "builder"
+    BUILDER_ARM = "builder-aarch64"
     STYLE_CHECKER = "style-checker"
     STYLE_CHECKER_ARM = "style-checker-aarch64"
     FUNC_TESTER = "func-tester"
diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py
index 525b3bf367b..c3e55aeac06 100644
--- a/tests/ci/test_ci_config.py
+++ b/tests/ci/test_ci_config.py
@@ -35,10 +35,16 @@ class TestCIConfig(unittest.TestCase):
                     f"Job [{job}] must have style-checker(-aarch64) runner",
                 )
             elif "binary_" in job.lower() or "package_" in job.lower():
-                self.assertTrue(
-                    CI.JOB_CONFIGS[job].runner_type == CI.Runners.BUILDER,
-                    f"Job [{job}] must have [{CI.Runners.BUILDER}] runner",
-                )
+                if job.lower() == CI.BuildNames.PACKAGE_AARCH64:
+                    self.assertTrue(
+                        CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER_ARM,),
+                        f"Job [{job}] must have [{CI.Runners.BUILDER_ARM}] runner",
+                    )
+                else:
+                    self.assertTrue(
+                        CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER,),
+                        f"Job [{job}] must have [{CI.Runners.BUILDER}] runner",
+                    )
             elif "aarch64" in job.lower():
                 self.assertTrue(
                     "aarch" in CI.JOB_CONFIGS[job].runner_type,

From 7432400fd0c07b7c967f47b1536706b8f791fcb1 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Fri, 16 Aug 2024 21:06:58 +0200
Subject: [PATCH 182/363] revert hacks made to prevent OOM in aarch64

---
 cmake/limit_jobs.cmake | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake
index 17d8dd42a2c..8e48fc9b9d8 100644
--- a/cmake/limit_jobs.cmake
+++ b/cmake/limit_jobs.cmake
@@ -42,19 +42,9 @@ endif ()
 # But use 2 parallel jobs, since:
 # - this is what llvm does
 # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary
-if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO)
-    if (ARCH_AARCH64)
-        # aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency
-        message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.")
-        set (PARALLEL_LINK_JOBS 1)
-        if (LINKER_NAME MATCHES "lld")
-            math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4)
-            set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}")
-        endif()
-    elseif (PARALLEL_LINK_JOBS GREATER 2)
-        message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
-        set (PARALLEL_LINK_JOBS 2)
-    endif ()
+if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2)
+    message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
+    set (PARALLEL_LINK_JOBS 2)
 endif()
 
 message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).")

From cb8d9a05643d3aac5f410c4eac53124224c63bc8 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Sat, 17 Aug 2024 20:13:35 +0200
Subject: [PATCH 183/363] fix typo

---
 docs/en/sql-reference/functions/type-conversion-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index cd6fd9ab839..a03394be226 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -4952,7 +4952,7 @@ toIntervalMonth(n)
 
 **Arguments**
 
-- `n` — Number of m. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of months. Positive integer number. [Int*](../data-types/int-uint.md).
 
 **Returned values**
 

From ae389d14ee65ff5fea3543868b6b161f9fcb806e Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Sat, 17 Aug 2024 20:42:00 +0200
Subject: [PATCH 184/363] Fix stylecheck

---
 src/Formats/SchemaInferenceUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 54352b88578..e8eab3b4453 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -333,7 +333,7 @@ namespace
             type = variant_type;
         type_indexes = {TypeIndex::Variant};
     }
-  
+
     /// If we have only date/datetimes types (Date/DateTime/DateTime64), convert all of them to the common type,
     /// otherwise, convert all Date, DateTime and DateTime64 to String.
     void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes)

From c68b597eee752d5921d4469af01104a27681296a Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 17 Aug 2024 20:03:18 +0000
Subject: [PATCH 185/363] fix test

---
 .../queries/0_stateless/03221_refreshable_matview_progress.sql  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
index 30228277bb5..1be276c485c 100644
--- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
+++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
@@ -15,6 +15,6 @@ SELECT rand64() AS x;
 
 SELECT sleep(2);
 
-SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE view = '03221_rmv';
+SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv';
 
 DROP TABLE 03221_rmv;

From a36688b07387822cae18eca1fbc798a517119e02 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 03:02:20 +0200
Subject: [PATCH 186/363] Fix error

---
 src/Parsers/ASTDataType.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp
index 21f56e5f7a2..4211347fb74 100644
--- a/src/Parsers/ASTDataType.cpp
+++ b/src/Parsers/ASTDataType.cpp
@@ -54,6 +54,7 @@ void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & stat
         }
         else
         {
+            frame.expression_list_prepend_whitespace = false;
             arguments->formatImpl(settings, state, frame);
         }
 

From 3c021e02b69f9237d9765d6295fd843ad2503398 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 03:02:26 +0200
Subject: [PATCH 187/363] Fix tests

---
 .../0_stateless/01458_named_tuple_millin.reference     |  8 ++++++--
 .../01504_compression_multiple_streams.reference       |  8 ++++----
 ...01548_create_table_compound_column_format.reference |  3 ++-
 .../01881_aggregate_functions_versioning.reference     |  2 +-
 .../02117_show_create_table_system.reference           | 10 ++++++++--
 .../02286_tuple_numeric_identifier.reference           |  2 +-
 .../02907_backup_restore_flatten_nested.reference      |  4 ++--
 7 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.reference b/tests/queries/0_stateless/01458_named_tuple_millin.reference
index 954dfe36563..86561570985 100644
--- a/tests/queries/0_stateless/01458_named_tuple_millin.reference
+++ b/tests/queries/0_stateless/01458_named_tuple_millin.reference
@@ -1,12 +1,16 @@
 CREATE TABLE default.tuple
 (
-    `j` Tuple(a Int8, b String)
+    `j` Tuple(
+        a Int8,
+        b String)
 )
 ENGINE = Memory
 j	Tuple(\n    a Int8,\n    b String)					
 CREATE TABLE default.tuple
 (
-    `j` Tuple(a Int8, b String)
+    `j` Tuple(
+        a Int8,
+        b String)
 )
 ENGINE = Memory
 j	Tuple(\n    a Int8,\n    b String)					
diff --git a/tests/queries/0_stateless/01504_compression_multiple_streams.reference b/tests/queries/0_stateless/01504_compression_multiple_streams.reference
index 4d3aba66526..14cdce72044 100644
--- a/tests/queries/0_stateless/01504_compression_multiple_streams.reference
+++ b/tests/queries/0_stateless/01504_compression_multiple_streams.reference
@@ -1,20 +1,20 @@
 1	1	[[1]]	(1,[1])
 1	1	[[1]]	(1,[1])
-CREATE TABLE default.columns_with_multiple_streams\n(\n    `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n    `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n    `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n    `field3` Tuple(UInt32, Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192
+CREATE TABLE default.columns_with_multiple_streams\n(\n    `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n    `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n    `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n    `field3` Tuple(\n        UInt32,\n        Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192
 1	1	[[1]]	(1,[1])
 2	2	[[2]]	(2,[2])
-CREATE TABLE default.columns_with_multiple_streams\n(\n    `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n    `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n    `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n    `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192
+CREATE TABLE default.columns_with_multiple_streams\n(\n    `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n    `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n    `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n    `field3` Tuple(\n        UInt32,\n        Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192
 1	1	[[1]]	(1,[1])
 2	2	[[2]]	(2,[2])
 3	3	[[3]]	(3,[3])
 1	1	[[1]]	(1,[1])
 1	1	[[1]]	(1,[1])
-CREATE TABLE default.columns_with_multiple_streams_compact\n(\n    `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n    `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n    `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n    `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192
+CREATE TABLE default.columns_with_multiple_streams_compact\n(\n    `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n    `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n    `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n    `field3` Tuple(\n        UInt32,\n        Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192
 1	1	[[1]]	(1,[1])
 2	2	[[2]]	(2,[2])
 1	1	[[1]]	(1,[1])
 2	2	[[2]]	(2,[2])
-CREATE TABLE default.columns_with_multiple_streams_compact\n(\n    `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n    `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n    `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n    `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192
+CREATE TABLE default.columns_with_multiple_streams_compact\n(\n    `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n    `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n    `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n    `field3` Tuple(\n        UInt32,\n        Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192
 1	1	[[1]]	(1,[1])
 2	2	[[2]]	(2,[2])
 3	3	[[3]]	(3,[3])
diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference
index 21e31e8f034..c23cc57548b 100644
--- a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference
+++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference
@@ -7,6 +7,7 @@ ENGINE = TinyLog
 CREATE TABLE test
 (
     `a` Int64,
-    `b` Tuple(a Int64)
+    `b` Tuple(
+        a Int64)
 )
 ENGINE = TinyLog
diff --git a/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference b/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference
index c30c4ca7e74..e15f312c2c8 100644
--- a/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference
+++ b/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference
@@ -1 +1 @@
-CREATE TABLE default.test_table\n(\n    `col1` DateTime,\n    `col2` Int64,\n    `col3` AggregateFunction(1, sumMap, Tuple(Array(UInt8), Array(UInt8)))\n)\nENGINE = AggregatingMergeTree\nORDER BY (col1, col2)\nSETTINGS index_granularity = 8192
+CREATE TABLE default.test_table\n(\n    `col1` DateTime,\n    `col2` Int64,\n    `col3` AggregateFunction(1, sumMap, Tuple(\n        Array(UInt8),\n        Array(UInt8)))\n)\nENGINE = AggregatingMergeTree\nORDER BY (col1, col2)\nSETTINGS index_granularity = 8192
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index 32e8b2f4312..638a46a142f 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -510,9 +510,15 @@ CREATE TABLE system.parts
     `rows_where_ttl_info.max` Array(DateTime),
     `projections` Array(String),
     `visible` UInt8,
-    `creation_tid` Tuple(UInt64, UInt64, UUID),
+    `creation_tid` Tuple(
+        UInt64,
+        UInt64,
+        UUID),
     `removal_tid_lock` UInt64,
-    `removal_tid` Tuple(UInt64, UInt64, UUID),
+    `removal_tid` Tuple(
+        UInt64,
+        UInt64,
+        UUID),
     `creation_csn` UInt64,
     `removal_csn` UInt64,
     `has_lightweight_delete` UInt8,
diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference
index 21348493d1d..916cdaf83cd 100644
--- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference
+++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference
@@ -1,4 +1,4 @@
-CREATE TABLE default.t_tuple_numeric\n(\n    `t` Tuple(`1` Tuple(`2` Int32, `3` Int32), `4` Int32)\n)\nENGINE = Memory
+CREATE TABLE default.t_tuple_numeric\n(\n    `t` Tuple(\n        `1` Tuple(\n            `2` Int32,\n            `3` Int32),\n        `4` Int32)\n)\nENGINE = Memory
 {"t":{"1":{"2":2,"3":3},"4":4}}
 2	3	4
 2	3	4
diff --git a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference
index aa8f22f590a..0db19f0591a 100644
--- a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference
+++ b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference
@@ -1,8 +1,8 @@
 BACKUP_CREATED
-CREATE TABLE default.test\n(\n    `test` Array(Tuple(foo String, bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
+CREATE TABLE default.test\n(\n    `test` Array(Tuple(\n        foo String,\n        bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
 BACKUP_CREATED
 CREATE TABLE default.test2\n(\n    `test` Nested(foo String, bar Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
 RESTORED
-CREATE TABLE default.test\n(\n    `test` Array(Tuple(foo String, bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
+CREATE TABLE default.test\n(\n    `test` Array(Tuple(\n        foo String,\n        bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
 RESTORED
 CREATE TABLE default.test2\n(\n    `test` Nested(foo String, bar Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192

From 60dd7e962a19e92cef4e3ab40d2607b3f5a59e90 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 05:56:27 +0200
Subject: [PATCH 188/363] Fix tests

---
 programs/format/Format.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp
index 4af77533c53..f07387bd395 100644
--- a/programs/format/Format.cpp
+++ b/programs/format/Format.cpp
@@ -264,10 +264,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
                     if (!backslash)
                     {
                         WriteBufferFromOwnString str_buf;
-                        oneline = oneline || approx_query_length < max_line_length;
-                        IAST::FormatSettings settings(str_buf, oneline, hilite);
+                        bool oneline_current_query = oneline || approx_query_length < max_line_length;
+                        IAST::FormatSettings settings(str_buf, oneline_current_query, hilite);
                         settings.show_secrets = true;
-                        settings.print_pretty_type_names = !oneline;
+                        settings.print_pretty_type_names = !oneline_current_query;
                         res->format(settings);
 
                         if (insert_query_payload)
@@ -311,7 +311,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
                     else
                     {
                         WriteBufferFromOwnString str_buf;
-                        formatAST(*res, str_buf, hilite, oneline);
+                        bool oneline_current_query = oneline || approx_query_length < max_line_length;
+                        IAST::FormatSettings settings(str_buf, oneline_current_query, hilite);
+                        settings.show_secrets = true;
+                        settings.print_pretty_type_names = !oneline_current_query;
+                        res->format(settings);
 
                         auto res_string = str_buf.str();
                         WriteBufferFromOStream res_cout(std::cout, 4096);

From 1691e4c4977dbaf44bedf61cdf248e7a1997d407 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 06:14:21 +0200
Subject: [PATCH 189/363] Fix test

---
 tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
index ee702300094..fbd7d897fb8 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
@@ -8,7 +8,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata"
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
 
-cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_block_size 8192 --max_insert_block_size 8192 --max_insert_threads 1 --min_insert_block_size_bytes 0 --min_insert_block_size_rows 0 -q "INSERT INTO ghdata FORMAT JSONAsObject"
+cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} \
+  --max_memory_usage 10G --query "INSERT INTO ghdata FORMAT JSONAsObject"
 
 ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)"
 
@@ -16,7 +17,7 @@ ${CLICKHOUSE_CLIENT} -q \
 "SELECT data.repo.name, count() AS stars FROM ghdata \
     WHERE data.type = 'WatchEvent' GROUP BY data.repo.name ORDER BY stars DESC, data.repo.name LIMIT 5"
 
-${CLICKHOUSE_CLIENT} --allow_experimental_analyzer=1 -q \
+${CLICKHOUSE_CLIENT} --enable_analyzer=1 -q \
 "SELECT data.payload.commits[].author.name AS name, count() AS c FROM ghdata \
     ARRAY JOIN data.payload.commits[].author.name \
     GROUP BY name ORDER BY c DESC, name LIMIT 5"

From 7f2c61799dc984add3056f9c77c4e64476ad917b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 06:14:59 +0200
Subject: [PATCH 190/363] Fix test

---
 .../0_stateless/01825_new_type_json_ghdata_insert_select.sh   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
index ef87034ff89..2afec5ba7fe 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
@@ -13,10 +13,10 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree OR
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'"
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
 
-cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2 FORMAT JSONAsObject"
+cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2 FORMAT JSONAsObject"
 cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_string FORMAT JSONAsString"
 
-${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_from_string SELECT data FROM ghdata_2_string"
+${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2_from_string SELECT data FROM ghdata_2_string"
 
 ${CLICKHOUSE_CLIENT} -q "SELECT \
     (SELECT mapSort(groupUniqArrayMap(JSONAllPathsWithTypes(data))), sum(cityHash64(toString(data))) FROM ghdata_2_from_string) = \

From d34d41d1e240f0df6256a827661174a30011f204 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 18 Aug 2024 04:34:38 +0000
Subject: [PATCH 191/363] fix test

---
 .../queries/0_stateless/03221_refreshable_matview_progress.sql  | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
index 1be276c485c..de8de41fd04 100644
--- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
+++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
@@ -16,5 +16,3 @@ SELECT rand64() AS x;
 SELECT sleep(2);
 
 SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv';
-
-DROP TABLE 03221_rmv;

From 03ab872f5c628613e0e187d4e1ad3ca9b9148bf6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 07:34:44 +0200
Subject: [PATCH 192/363] Fix error

---
 src/IO/S3/PocoHTTPClient.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h
index 3b7ec4d1d9c..eb65460ce13 100644
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@@ -20,6 +20,7 @@
 #include <aws/core/http/HttpRequest.h>
 #include <aws/core/http/standard/StandardHttpResponse.h>
 
+
 namespace Aws::Http::Standard
 {
 class StandardHttpResponse;
@@ -27,15 +28,17 @@ class StandardHttpResponse;
 
 namespace DB
 {
-
 class Context;
 }
 
+
 namespace DB::S3
 {
+
 class ClientFactory;
 class PocoHTTPClient;
 
+
 struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
 {
     std::function<ProxyConfiguration()> per_request_configuration;
@@ -76,6 +79,7 @@ private:
     friend ClientFactory;
 };
 
+
 class PocoHTTPResponse : public Aws::Http::Standard::StandardHttpResponse
 {
 public:
@@ -115,6 +119,7 @@ private:
     Aws::Utils::Stream::ResponseStream body_stream;
 };
 
+
 class PocoHTTPClient : public Aws::Http::HttpClient
 {
 public:
@@ -170,10 +175,10 @@ protected:
     std::function<void(const ProxyConfiguration &)> error_report;
     ConnectionTimeouts timeouts;
     const RemoteHostFilter & remote_host_filter;
-    unsigned int s3_max_redirects;
+    unsigned int s3_max_redirects = 0;
     bool s3_use_adaptive_timeouts = true;
     bool enable_s3_requests_logging = false;
-    bool for_disk_s3;
+    bool for_disk_s3 = false;
 
     /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler
     /// (i.e. throttles GetObject, HeadObject)

From 4e91c663a62529d27ddcf6d364338f001e1706eb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 08:28:16 +0200
Subject: [PATCH 193/363] Fix error

---
 src/IO/S3/PocoHTTPClient.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index dc7dcdc6793..3e060e21c51 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -397,8 +397,11 @@ void PocoHTTPClient::makeRequestInternalImpl(
 
     try
     {
-        const auto proxy_configuration = per_request_configuration();
-        for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt)
+        ProxyConfiguration proxy_configuration;
+        if (per_request_configuration)
+            proxy_configuration = per_request_configuration();
+
+        for (size_t attempt = 0; attempt <= s3_max_redirects; ++attempt)
         {
             Poco::URI target_uri(uri);
 
@@ -516,7 +519,6 @@ void PocoHTTPClient::makeRequestInternalImpl(
                     LOG_TEST(log, "Redirecting request to new location: {}", location);
 
                 addMetric(request, S3MetricType::Redirects);
-
                 continue;
             }
 
@@ -564,9 +566,9 @@ void PocoHTTPClient::makeRequestInternalImpl(
             }
             else
             {
-
                 if (status_code == 429 || status_code == 503)
-                { // API throttling
+                {
+                    /// API throttling
                     addMetric(request, S3MetricType::Throttling);
                 }
                 else if (status_code >= 300)

From 7071942858e44053b92b8386e68251be7718e3b5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 09:05:45 +0200
Subject: [PATCH 194/363] Miscellanous changes from #66999

---
 programs/keeper/Keeper.cpp | 4 +++-
 src/Daemon/BaseDaemon.cpp  | 4 +---
 src/Daemon/BaseDaemon.h    | 1 -
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index a447a9e50f6..ced661d9772 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -66,6 +66,8 @@
 /// A minimal file used when the keeper is run without installation
 INCBIN(keeper_resource_embedded_xml, SOURCE_DIR "/programs/keeper/keeper_embedded.xml");
 
+extern const char * GIT_HASH;
+
 int mainEntryClickHouseKeeper(int argc, char ** argv)
 {
     DB::Keeper app;
@@ -675,7 +677,7 @@ void Keeper::logRevision() const
         "Starting ClickHouse Keeper {} (revision: {}, git hash: {}, build id: {}), PID {}",
         VERSION_STRING,
         ClickHouseRevision::getVersionRevision(),
-        git_hash.empty() ? "<unknown>" : git_hash,
+        GIT_HASH,
         build_id.empty() ? "<unknown>" : build_id,
         getpid());
 }
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index f75699881bd..c42bf7641d2 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -452,8 +452,6 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
     build_id = SymbolIndex::instance().getBuildIDHex();
 #endif
 
-    git_hash = GIT_HASH;
-
 #if defined(OS_LINUX)
     std::string executable_path = getExecutablePath();
 
@@ -466,7 +464,7 @@ void BaseDaemon::logRevision() const
 {
     logger().information("Starting " + std::string{VERSION_FULL}
         + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision())
-        + ", git hash: " + (git_hash.empty() ? "<unknown>" : git_hash)
+        + ", git hash: " + std::string(GIT_HASH)
         + ", build id: " + (build_id.empty() ? "<unknown>" : build_id) + ")"
         + ", PID " + std::to_string(getpid()));
 }
diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h
index b15aa74fcf3..a6efa94a567 100644
--- a/src/Daemon/BaseDaemon.h
+++ b/src/Daemon/BaseDaemon.h
@@ -165,7 +165,6 @@ protected:
     Poco::Util::AbstractConfiguration * last_configuration = nullptr;
 
     String build_id;
-    String git_hash;
     String stored_binary_hash;
 
     bool should_setup_watchdog = false;

From 2a48aaad561f52539edbede94015c35e264bd344 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 09:12:49 +0200
Subject: [PATCH 195/363] Fix build

---
 src/Interpreters/executeQuery.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index fe87eed5570..decc16a3704 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -786,7 +786,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             /// Verify that AST formatting is consistent:
             /// If you format AST, parse it back, and format it again, you get the same string.
 
-            String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true);
+            String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false);
 
             /// The query can become more verbose after formatting, so:
             size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0;
@@ -811,7 +811,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
             chassert(ast2);
 
-            String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true);
+            String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false);
 
             if (formatted1 != formatted2)
                 throw Exception(ErrorCodes::LOGICAL_ERROR,

From 4f7e3e8374acd98496092e8f8a219af6755a2f70 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 09:38:00 +0200
Subject: [PATCH 196/363] Fix test 01017_uniqCombined_memory_usage

---
 .../0_stateless/01017_uniqCombined_memory_usage.sql        | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
index c13a0859183..eca370d94af 100644
--- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
+++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql
@@ -7,7 +7,8 @@
 -- sizeof(HLL) is (2^K * 6 / 8)
 -- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400
 
-SET use_uncompressed_cache = 0; 
+SET use_uncompressed_cache = 0;
+SET memory_profiler_step = 1;
 
 -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements
 SELECT 'UInt32';
@@ -31,14 +32,14 @@ SELECT 'K=16';
 SELECT 'UInt32';
 SET max_memory_usage = 2000000;
 SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError MEMORY_LIMIT_EXCEEDED }
-SET max_memory_usage = 4915200;
+SET max_memory_usage = 5230000;
 SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k);
 
 -- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements
 SELECT 'UInt64';
 SET max_memory_usage = 2000000;
 SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); -- { serverError MEMORY_LIMIT_EXCEEDED }
-SET max_memory_usage = 4915200;
+SET max_memory_usage = 5900000;
 SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k);
 
 SELECT 'K=18';

From ec8554c85aeae5dcc8367ce09d093c5526ef1d47 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 09:41:29 +0200
Subject: [PATCH 197/363] Fix build

---
 src/Common/SignalHandlers.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/Common/SignalHandlers.cpp b/src/Common/SignalHandlers.cpp
index c4358da2453..6ac6cbcae29 100644
--- a/src/Common/SignalHandlers.cpp
+++ b/src/Common/SignalHandlers.cpp
@@ -18,13 +18,17 @@
 
 namespace DB
 {
+
 namespace ErrorCodes
 {
 extern const int CANNOT_SET_SIGNAL_HANDLER;
 extern const int CANNOT_SEND_SIGNAL;
 }
+
 }
 
+extern const char * GIT_HASH;
+
 using namespace DB;
 
 
@@ -334,7 +338,7 @@ void SignalListener::onTerminate(std::string_view message, UInt32 thread_num) co
     size_t pos = message.find('\n');
 
     LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}",
-              VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", thread_num, message.substr(0, pos));
+              VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, message.substr(0, pos));
 
     /// Print trace from std::terminate exception line-by-line to make it easy for grep.
     while (pos != std::string_view::npos)
@@ -368,7 +372,7 @@ try
 
     LOG_FATAL(log, "########## Short fault info ############");
     LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}",
-              VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
+              VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH,
               thread_num, sig);
 
     std::string signal_description = "Unknown signal";
@@ -434,13 +438,13 @@ try
     if (query_id.empty())
     {
         LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})",
-                  VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
+                  VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH,
                   thread_num, signal_description, sig);
     }
     else
     {
         LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
-                  VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
+                  VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH,
                   thread_num, query_id, query, signal_description, sig);
     }
 

From d6e170f77704833fa6655820d55090ba18b0b9fe Mon Sep 17 00:00:00 2001
From: Chang Chen <baibaichen@gmail.com>
Date: Sat, 17 Aug 2024 18:31:11 +0800
Subject: [PATCH 198/363] repeat field is also compound types, ignore it.

---
 src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index c6167e572df..7b5c29e321f 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -286,6 +286,9 @@ static std::vector<Range> getHyperrectangleForRowGroup(const parquet::FileMetaDa
         if (!s)
             continue;
 
+        if(s->descr()->schema_node()->is_repeated())
+            continue;
+
         auto path = c->path_in_schema()->ToDotVector();
         if (path.size() != 1)
             continue; // compound types not supported

From 858f8b502002661584e6153d39a23edc87b49dda Mon Sep 17 00:00:00 2001
From: Chang Chen <baibaichen@gmail.com>
Date: Sun, 18 Aug 2024 19:11:30 +0800
Subject: [PATCH 199/363] add test and update codes per commit

---
 .../Formats/Impl/ParquetBlockInputFormat.cpp      |   2 +-
 .../02841_parquet_filter_pushdown_bug.reference   |   1 +
 .../02841_parquet_filter_pushdown_bug.sh.sh       |   8 ++++++++
 .../0_stateless/data_parquet/68131.parquet        | Bin 0 -> 289 bytes
 4 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference
 create mode 100755 tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh
 create mode 100644 tests/queries/0_stateless/data_parquet/68131.parquet

diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 7b5c29e321f..1f213fef731 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -286,7 +286,7 @@ static std::vector<Range> getHyperrectangleForRowGroup(const parquet::FileMetaDa
         if (!s)
             continue;
 
-        if(s->descr()->schema_node()->is_repeated())
+        if (s->descr()->schema_node()->is_repeated())
             continue;
 
         auto path = c->path_in_schema()->ToDotVector();
diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference
new file mode 100644
index 00000000000..6ed63af507a
--- /dev/null
+++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference
@@ -0,0 +1 @@
+[1,2]
diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh
new file mode 100755
index 00000000000..58eb207b6e6
--- /dev/null
+++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select f from file('$CURDIR/data_parquet/68131.parquet', Parquet, 'f Array(Int32)')"
\ No newline at end of file
diff --git a/tests/queries/0_stateless/data_parquet/68131.parquet b/tests/queries/0_stateless/data_parquet/68131.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..169f6152003db164c78e33cd69205caa33f906b5
GIT binary patch
literal 289
zcmXAl!D_=W42Bgqgq#M0O4Q&(E)5xMp|QKBg<W<lJ(V4D+-N)p3f<DB$zbo$)AllZ
zg}v331|$3bg#1hP_VFo0z=YTD`R7|G^ce#T00aPx#AP3Ota~M|g`D_3@zEzxNMcYF
zV!@*4+x6A4soT2js+Uvyb#N#+J@qGE1tg@BDh?IbO#?RZDg^SWpdt<*2Ft;pAh|Rc
zh&%9DYy&`N56AS3Dex^3hg6IKlFf0~d~~k8duyUk*PQnCdz7V_PIZ)&-Tv?z<+(F6
nyPU0zSuUKt*Vfo-x3Fej=B_eEXGUwgn%_vJ_#uBBTmGDX*3d06

literal 0
HcmV?d00001


From 01841d00eab668ecfa96a702e7f3cda68fce52e4 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 18 Aug 2024 12:18:24 +0000
Subject: [PATCH 200/363] fix test

---
 .../0_stateless/03221_refreshable_matview_progress.sql        | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
index de8de41fd04..ecb385c9bfa 100644
--- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
+++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
@@ -3,7 +3,7 @@
 set allow_experimental_refreshable_materialized_view=1;
 
 CREATE MATERIALIZED VIEW 03221_rmv
-REFRESH AFTER 1 SECOND
+REFRESH AFTER 10 SECOND
 (
 x UInt64
 )
@@ -16,3 +16,5 @@ SELECT rand64() AS x;
 SELECT sleep(2);
 
 SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv';
+
+DROP TABLE 03221_rmv;

From f5308635d193859cdb19a71040006278a21bdc51 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 18 Aug 2024 15:25:07 +0200
Subject: [PATCH 201/363] Revert "Improve compatibility of `upper/lowerUTF8`
 with Spark"

---
 .gitmodules                                   |   7 +-
 contrib/icu                                   |   2 +-
 src/Common/examples/CMakeLists.txt            |   5 -
 src/Common/examples/utf8_upper_lower.cpp      |  27 --
 src/Functions/LowerUpperImpl.h                |   1 +
 src/Functions/LowerUpperUTF8Impl.h            | 283 +++++++++++++++---
 src/Functions/initcapUTF8.cpp                 |   3 +-
 src/Functions/lowerUTF8.cpp                   |  25 +-
 src/Functions/upperUTF8.cpp                   |  24 +-
 .../00170_lower_upper_utf8.reference          |   4 -
 .../0_stateless/00170_lower_upper_utf8.sql    |  11 -
 .../00233_position_function_family.sql        |   3 -
 .../0_stateless/00761_lower_utf8_bug.sql      |   3 -
 .../0_stateless/01278_random_string_utf8.sql  |   3 -
 .../0_stateless/01431_utf8_ubsan.reference    |   4 +-
 .../queries/0_stateless/01431_utf8_ubsan.sql  |   3 -
 .../0_stateless/01590_countSubstrings.sql     |   3 -
 ...71_lower_upper_utf8_row_overlaps.reference |   4 +-
 .../02071_lower_upper_utf8_row_overlaps.sql   |   3 -
 ...new_functions_must_be_documented.reference |   2 +
 .../02514_if_with_lazy_low_cardinality.sql    |   3 -
 .../0_stateless/02807_lower_utf8_msan.sql     |   3 -
 tests/queries/0_stateless/03015_peder1001.sql |   3 -
 23 files changed, 265 insertions(+), 164 deletions(-)
 delete mode 100644 src/Common/examples/utf8_upper_lower.cpp

diff --git a/.gitmodules b/.gitmodules
index f18844e5eb4..cdee6a43ad8 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -106,6 +106,9 @@
 [submodule "contrib/icudata"]
 	path = contrib/icudata
 	url = https://github.com/ClickHouse/icudata
+[submodule "contrib/icu"]
+	path = contrib/icu
+	url = https://github.com/unicode-org/icu
 [submodule "contrib/flatbuffers"]
 	path = contrib/flatbuffers
 	url = https://github.com/ClickHouse/flatbuffers
@@ -366,7 +369,3 @@
 [submodule "contrib/numactl"]
 	path = contrib/numactl
 	url = https://github.com/ClickHouse/numactl.git
-[submodule "contrib/icu"]
-	path = contrib/icu
-	url = https://github.com/ClickHouse/icu
-	branch = ClickHouse/release-75-1
diff --git a/contrib/icu b/contrib/icu
index 4216173eeeb..7750081bda4 160000
--- a/contrib/icu
+++ b/contrib/icu
@@ -1 +1 @@
-Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273
+Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625
diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt
index 8383e80d09d..69580d4ad0e 100644
--- a/src/Common/examples/CMakeLists.txt
+++ b/src/Common/examples/CMakeLists.txt
@@ -92,8 +92,3 @@ endif()
 
 clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp)
 target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config)
-
-if (TARGET ch_contrib::icu)
-    clickhouse_add_executable (utf8_upper_lower utf8_upper_lower.cpp)
-    target_link_libraries (utf8_upper_lower PRIVATE ch_contrib::icu)
-endif ()
diff --git a/src/Common/examples/utf8_upper_lower.cpp b/src/Common/examples/utf8_upper_lower.cpp
deleted file mode 100644
index 826e1763105..00000000000
--- a/src/Common/examples/utf8_upper_lower.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <iostream>
-#include <unicode/unistr.h>
-
-std::string utf8_to_lower(const std::string & input)
-{
-    icu::UnicodeString unicodeInput(input.c_str(), "UTF-8");
-    unicodeInput.toLower();
-    std::string output;
-    unicodeInput.toUTF8String(output);
-    return output;
-}
-
-std::string utf8_to_upper(const std::string & input)
-{
-    icu::UnicodeString unicodeInput(input.c_str(), "UTF-8");
-    unicodeInput.toUpper();
-    std::string output;
-    unicodeInput.toUTF8String(output);
-    return output;
-}
-
-int main()
-{
-    std::string input = "ır";
-    std::cout << "upper:" << utf8_to_upper(input) << std::endl;
-    return 0;
-}
diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h
index a52703d10c8..d463ef96e16 100644
--- a/src/Functions/LowerUpperImpl.h
+++ b/src/Functions/LowerUpperImpl.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <Columns/ColumnString.h>
 
+
 namespace DB
 {
 
diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index 5da085f48e5..eedabca5b22 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -1,14 +1,15 @@
 #pragma once
-
-#include "config.h"
-
-#if USE_ICU
-
 #include <Columns/ColumnString.h>
 #include <Functions/LowerUpperImpl.h>
-#include <base/find_symbols.h>
-#include <unicode/unistr.h>
+#include <base/defines.h>
+#include <Poco/UTF8Encoding.h>
 #include <Common/StringUtils.h>
+#include <Common/UTF8Helpers.h>
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
 
 namespace DB
 {
@@ -18,7 +19,71 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-template <char not_case_lower_bound, char not_case_upper_bound, bool upper>
+/// xor or do nothing
+template <bool>
+UInt8 xor_or_identity(const UInt8 c, const int mask)
+{
+    return c ^ mask;
+}
+
+template <>
+inline UInt8 xor_or_identity<false>(const UInt8 c, const int)
+{
+    return c;
+}
+
+/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array
+template <bool to_lower>
+inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst)
+{
+    if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu))
+    {
+        /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ
+        *dst++ = xor_or_identity<to_lower>(*src++, 0x1);
+        *dst++ = xor_or_identity<to_lower>(*src++, 0x10);
+    }
+    else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu))
+    {
+        /// ѐёђѓєѕіїјљњћќѝўџ
+        *dst++ = xor_or_identity<!to_lower>(*src++, 0x1);
+        *dst++ = xor_or_identity<!to_lower>(*src++, 0x10);
+    }
+    else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu))
+    {
+        /// А-П
+        *dst++ = *src++;
+        *dst++ = xor_or_identity<to_lower>(*src++, 0x20);
+    }
+    else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu))
+    {
+        /// а-п
+        *dst++ = *src++;
+        *dst++ = xor_or_identity<!to_lower>(*src++, 0x20);
+    }
+    else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu))
+    {
+        /// Р-Я
+        *dst++ = xor_or_identity<to_lower>(*src++, 0x1);
+        *dst++ = xor_or_identity<to_lower>(*src++, 0x20);
+    }
+    else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu))
+    {
+        /// р-я
+        *dst++ = xor_or_identity<!to_lower>(*src++, 0x1);
+        *dst++ = xor_or_identity<!to_lower>(*src++, 0x20);
+    }
+}
+
+
+/** If the string contains UTF-8 encoded text, convert it to the lower (upper) case.
+  * Note: It is assumed that after the character is converted to another case,
+  *  the length of its multibyte sequence in UTF-8 does not change.
+  * Otherwise, the behavior is undefined.
+  */
+template <char not_case_lower_bound,
+    char not_case_upper_bound,
+    int to_case(int),
+    void cyrillic_to_case(const UInt8 *&, UInt8 *&)>
 struct LowerUpperUTF8Impl
 {
     static void vector(
@@ -38,46 +103,180 @@ struct LowerUpperUTF8Impl
             return;
         }
 
-        res_data.resize(data.size());
-        res_offsets.resize_exact(offsets.size());
-
-        String output;
-        size_t curr_offset = 0;
-        for (size_t i = 0; i < offsets.size(); ++i)
-        {
-            const auto * data_start = reinterpret_cast<const char *>(&data[offsets[i - 1]]);
-            size_t size = offsets[i] - offsets[i - 1];
-
-            icu::UnicodeString input(data_start, static_cast<int32_t>(size), "UTF-8");
-            if constexpr (upper)
-                input.toUpper();
-            else
-                input.toLower();
-
-            output.clear();
-            input.toUTF8String(output);
-
-            /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first
-            /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this
-            /// case, the behavior is also reasonable.
-            const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size());
-            size_t valid_size = res_end ? res_end - output.data() + 1 : 0;
-
-            res_data.resize(curr_offset + valid_size + 1);
-            memcpy(&res_data[curr_offset], output.data(), valid_size);
-            res_data[curr_offset + valid_size] = 0;
-
-            curr_offset += valid_size + 1;
-            res_offsets[i] = curr_offset;
-        }
+        res_data.resize_exact(data.size());
+        res_offsets.assign(offsets);
+        array(data.data(), data.data() + data.size(), offsets, res_data.data());
     }
 
     static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
     {
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument");
     }
+
+    /** Converts a single code point starting at `src` to desired case, storing result starting at `dst`.
+     *    `src` and `dst` are incremented by corresponding sequence lengths. */
+    static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial)
+    {
+        if (src[0] <= ascii_upper_bound)
+        {
+            if (*src >= not_case_lower_bound && *src <= not_case_upper_bound)
+                *dst++ = *src++ ^ flip_case_mask;
+            else
+                *dst++ = *src++;
+        }
+        else if (src + 1 < src_end
+            && ((src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0xBFu)) || (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x9Fu))))
+        {
+            cyrillic_to_case(src, dst);
+        }
+        else if (src + 1 < src_end && src[0] == 0xC2u)
+        {
+            /// Punctuation U+0080 - U+00BF, UTF-8: C2 80 - C2 BF
+            *dst++ = *src++;
+            *dst++ = *src++;
+        }
+        else if (src + 2 < src_end && src[0] == 0xE2u)
+        {
+            /// Characters U+2000 - U+2FFF, UTF-8: E2 80 80 - E2 BF BF
+            *dst++ = *src++;
+            *dst++ = *src++;
+            *dst++ = *src++;
+        }
+        else
+        {
+            size_t src_sequence_length = UTF8::seqLength(*src);
+            /// In case partial buffer was passed (due to SSE optimization)
+            /// we cannot convert it with current src_end, but we may have more
+            /// bytes to convert and eventually got correct symbol.
+            if (partial && src_sequence_length > static_cast<size_t>(src_end - src))
+                return false;
+
+            auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
+            if (src_code_point)
+            {
+                int dst_code_point = to_case(*src_code_point);
+                if (dst_code_point > 0)
+                {
+                    size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
+                    assert(dst_sequence_length <= 4);
+
+                    /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
+                    /// As an example, this happens for ß and ẞ.
+                    if (dst_sequence_length == src_sequence_length)
+                    {
+                        src += dst_sequence_length;
+                        dst += dst_sequence_length;
+                        return true;
+                    }
+                }
+            }
+
+            *dst = *src;
+            ++dst;
+            ++src;
+        }
+
+        return true;
+    }
+
+private:
+    static constexpr auto ascii_upper_bound = '\x7f';
+    static constexpr auto flip_case_mask = 'A' ^ 'a';
+
+    static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
+    {
+        const auto * offset_it = offsets.begin();
+        const UInt8 * begin = src;
+
+#ifdef __SSE2__
+        static constexpr auto bytes_sse = sizeof(__m128i);
+
+        /// If we are before this position, we can still read at least bytes_sse.
+        const auto * src_end_sse = src_end - bytes_sse + 1;
+
+        /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f)
+        const auto v_zero = _mm_setzero_si128();
+        const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1);
+        const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1);
+        const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask);
+
+        while (src < src_end_sse)
+        {
+            const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
+
+            /// check for ASCII
+            const auto is_not_ascii = _mm_cmplt_epi8(chars, v_zero);
+            const auto mask_is_not_ascii = _mm_movemask_epi8(is_not_ascii);
+
+            /// ASCII
+            if (mask_is_not_ascii == 0)
+            {
+                const auto is_not_case
+                    = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound));
+                const auto mask_is_not_case = _mm_movemask_epi8(is_not_case);
+
+                /// everything in correct case ASCII
+                if (mask_is_not_case == 0)
+                    _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars);
+                else
+                {
+                    /// ASCII in mixed case
+                    /// keep `flip_case_mask` only where necessary, zero out elsewhere
+                    const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case);
+
+                    /// flip case by applying calculated mask
+                    const auto cased_chars = _mm_xor_si128(chars, xor_mask);
+
+                    /// store result back to destination
+                    _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars);
+                }
+
+                src += bytes_sse;
+                dst += bytes_sse;
+            }
+            else
+            {
+                /// UTF-8
+
+                /// Find the offset of the next string after src
+                size_t offset_from_begin = src - begin;
+                while (offset_from_begin >= *offset_it)
+                    ++offset_it;
+
+                /// Do not allow one row influence another (since row may have invalid sequence, and break the next)
+                const UInt8 * row_end = begin + *offset_it;
+                chassert(row_end >= src);
+                const UInt8 * expected_end = std::min(src + bytes_sse, row_end);
+
+                while (src < expected_end)
+                {
+                    if (!toCase(src, expected_end, dst, /* partial= */ true))
+                    {
+                        /// Fallback to handling byte by byte.
+                        src_end_sse = src;
+                        break;
+                    }
+                }
+            }
+        }
+
+        /// Find the offset of the next string after src
+        size_t offset_from_begin = src - begin;
+        while (offset_it != offsets.end() && offset_from_begin >= *offset_it)
+            ++offset_it;
+#endif
+
+        /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
+        while (src < src_end)
+        {
+            const UInt8 * row_end = begin + *offset_it;
+            chassert(row_end >= src);
+
+            while (src < row_end)
+                toCase(src, row_end, dst, /* partial= */ false);
+            ++offset_it;
+        }
+    }
 };
 
 }
-
-#endif
diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp
index 004586dce26..282d846094e 100644
--- a/src/Functions/initcapUTF8.cpp
+++ b/src/Functions/initcapUTF8.cpp
@@ -1,8 +1,9 @@
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionStringToString.h>
+#include <Functions/LowerUpperUTF8Impl.h>
 #include <Functions/FunctionFactory.h>
 #include <Poco/Unicode.h>
-#include <Common/UTF8Helpers.h>
+
 
 namespace DB
 {
diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp
index e2f7cb84730..7adb0069121 100644
--- a/src/Functions/lowerUTF8.cpp
+++ b/src/Functions/lowerUTF8.cpp
@@ -1,10 +1,9 @@
-#include "config.h"
-
-#if USE_ICU
-
-#include <Functions/FunctionFactory.h>
+#include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionStringToString.h>
 #include <Functions/LowerUpperUTF8Impl.h>
+#include <Functions/FunctionFactory.h>
+#include <Poco/Unicode.h>
+
 
 namespace DB
 {
@@ -16,25 +15,13 @@ struct NameLowerUTF8
     static constexpr auto name = "lowerUTF8";
 };
 
-using FunctionLowerUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'A', 'Z', false>, NameLowerUTF8>;
+using FunctionLowerUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'A', 'Z', Poco::Unicode::toLower, UTF8CyrillicToCase<true>>, NameLowerUTF8>;
 
 }
 
 REGISTER_FUNCTION(LowerUTF8)
 {
-    FunctionDocumentation::Description description
-        = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)";
-    FunctionDocumentation::Syntax syntax = "lowerUTF8(input)";
-    FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}};
-    FunctionDocumentation::ReturnedValue returned_value = "A String data type value";
-    FunctionDocumentation::Examples examples = {
-        {"first", "SELECT lowerUTF8('München') as Lowerutf8;", "münchen"},
-    };
-    FunctionDocumentation::Categories categories = {"String"};
-
-    factory.registerFunction<FunctionLowerUTF8>({description, syntax, arguments, returned_value, examples, categories});
+    factory.registerFunction<FunctionLowerUTF8>();
 }
 
 }
-
-#endif
diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp
index ef26430331f..659e67f0ef3 100644
--- a/src/Functions/upperUTF8.cpp
+++ b/src/Functions/upperUTF8.cpp
@@ -1,10 +1,8 @@
-#include "config.h"
-
-#if USE_ICU
-
-#include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
 #include <Functions/LowerUpperUTF8Impl.h>
+#include <Functions/FunctionFactory.h>
+#include <Poco/Unicode.h>
+
 
 namespace DB
 {
@@ -16,25 +14,13 @@ struct NameUpperUTF8
     static constexpr auto name = "upperUTF8";
 };
 
-using FunctionUpperUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'a', 'z', true>, NameUpperUTF8>;
+using FunctionUpperUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'a', 'z', Poco::Unicode::toUpper, UTF8CyrillicToCase<false>>, NameUpperUTF8>;
 
 }
 
 REGISTER_FUNCTION(UpperUTF8)
 {
-    FunctionDocumentation::Description description
-        = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)";
-    FunctionDocumentation::Syntax syntax = "upperUTF8(input)";
-    FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}};
-    FunctionDocumentation::ReturnedValue returned_value = "A String data type value";
-    FunctionDocumentation::Examples examples = {
-        {"first", "SELECT upperUTF8('München') as Upperutf8;", "MÜNCHEN"},
-    };
-    FunctionDocumentation::Categories categories = {"String"};
-
-    factory.registerFunction<FunctionUpperUTF8>({description, syntax, arguments, returned_value, examples, categories});
+    factory.registerFunction<FunctionUpperUTF8>();
 }
 
 }
-
-#endif
diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference
index 3c644f22b9b..f202cb75513 100644
--- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference
+++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference
@@ -22,7 +22,3 @@
 1
 1
 1
-1
-1
-1
-1
diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql
index 85b6c5c6095..4caba2033ff 100644
--- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql
+++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql
@@ -1,6 +1,3 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str;
 select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str;
 select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa';
@@ -30,11 +27,3 @@ select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВ
 select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n;
 select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() from system.one array join range(16384) as n;
 select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n;
-
--- Turkish language
-select upperUTF8('ır') = 'IR';
-select lowerUTF8('ır') = 'ır';
-
--- German language
-select upper('öäüß') = 'öäüß';
-select lower('ÖÄÜẞ') = 'ÖÄÜẞ';
diff --git a/tests/queries/0_stateless/00233_position_function_family.sql b/tests/queries/0_stateless/00233_position_function_family.sql
index d6668cb7ba4..dd7394bc39a 100644
--- a/tests/queries/0_stateless/00233_position_function_family.sql
+++ b/tests/queries/0_stateless/00233_position_function_family.sql
@@ -1,6 +1,3 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 SET send_logs_level = 'fatal';
 
 select 1 = position('', '');
diff --git a/tests/queries/0_stateless/00761_lower_utf8_bug.sql b/tests/queries/0_stateless/00761_lower_utf8_bug.sql
index a0ab55edc15..de20b894331 100644
--- a/tests/queries/0_stateless/00761_lower_utf8_bug.sql
+++ b/tests/queries/0_stateless/00761_lower_utf8_bug.sql
@@ -1,4 +1 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 SELECT lowerUTF8('\xF0') = lowerUTF8('\xF0');
diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql
index 290d6a0c759..da2dc48c3e1 100644
--- a/tests/queries/0_stateless/01278_random_string_utf8.sql
+++ b/tests/queries/0_stateless/01278_random_string_utf8.sql
@@ -1,6 +1,3 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 SELECT randomStringUTF8('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT lengthUTF8(randomStringUTF8(100));
 SELECT toTypeName(randomStringUTF8(10));
diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference
index dc785e57851..c98c950d535 100644
--- a/tests/queries/0_stateless/01431_utf8_ubsan.reference
+++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference
@@ -1,2 +1,2 @@
-EFBFBD
-EFBFBD
+FF
+FF
diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql
index 3a28e023805..d6a299225b1 100644
--- a/tests/queries/0_stateless/01431_utf8_ubsan.sql
+++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql
@@ -1,5 +1,2 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 SELECT hex(lowerUTF8('\xFF'));
 SELECT hex(upperUTF8('\xFF'));
diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql
index 5ec4f412d7f..b38cbb7d188 100644
--- a/tests/queries/0_stateless/01590_countSubstrings.sql
+++ b/tests/queries/0_stateless/01590_countSubstrings.sql
@@ -1,6 +1,3 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 --
 -- countSubstrings
 --
diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
index deabef61a88..a3bac432482 100644
--- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
+++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
@@ -5,9 +5,9 @@ insert into utf8_overlap values ('\xe2'), ('Foo⚊BarBazBam'), ('\xe2'), ('Foo
 --                                             MONOGRAM FOR YANG
 with lowerUTF8(str) as l_, upperUTF8(str) as u_, '0x' || hex(str) as h_
 select length(str), if(l_ == '\xe2', h_, l_), if(u_ == '\xe2', h_, u_) from utf8_overlap format CSV;
-1,"�","�"
+1,"0xE2","0xE2"
 15,"foo⚊barbazbam","FOO⚊BARBAZBAM"
-1,"�","�"
+1,"0xE2","0xE2"
 15,"foo⚊barbazbam","FOO⚊BARBAZBAM"
 -- NOTE: regression test for introduced bug
 -- https://github.com/ClickHouse/ClickHouse/issues/42756
diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
index d175e0659d0..8ca0a3f5f75 100644
--- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
+++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
@@ -1,6 +1,3 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 drop table if exists utf8_overlap;
 create table utf8_overlap (str String) engine=Memory();
 
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index 0980e25b70f..c39f1fb1ce9 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -416,6 +416,7 @@ logTrace
 lowCardinalityIndices
 lowCardinalityKeys
 lower
+lowerUTF8
 makeDate
 makeDate32
 makeDateTime
@@ -896,6 +897,7 @@ tupleToNameValuePairs
 unbin
 unhex
 upper
+upperUTF8
 uptime
 validateNestedArraySizes
 version
diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
index b169cfd0ab9..80e3c0a9ece 100644
--- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
+++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
@@ -1,6 +1,3 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory;
 insert into t (`arr.key`, `arr.value`) values (['a'], ['b']);
 select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr;
diff --git a/tests/queries/0_stateless/02807_lower_utf8_msan.sql b/tests/queries/0_stateless/02807_lower_utf8_msan.sql
index 95f224577f7..e9eb18bf615 100644
--- a/tests/queries/0_stateless/02807_lower_utf8_msan.sql
+++ b/tests/queries/0_stateless/02807_lower_utf8_msan.sql
@@ -1,5 +1,2 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 SELECT lowerUTF8(arrayJoin(['©--------------------------------------', '©--------------------'])) ORDER BY 1;
 SELECT upperUTF8(materialize('aaaaАБВГaaaaaaaaaaaaАБВГAAAAaaAA')) FROM numbers(2);
diff --git a/tests/queries/0_stateless/03015_peder1001.sql b/tests/queries/0_stateless/03015_peder1001.sql
index df8e4db1536..810503207f2 100644
--- a/tests/queries/0_stateless/03015_peder1001.sql
+++ b/tests/queries/0_stateless/03015_peder1001.sql
@@ -1,6 +1,3 @@
--- Tags: no-fasttest
--- no-fasttest: upper/lowerUTF8 use ICU
-
 DROP TABLE IF EXISTS test_data;
 
 CREATE TABLE test_data

From 427016a450cad536e8cbaf4de04d07313456aa4b Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Tue, 6 Aug 2024 21:39:41 +0200
Subject: [PATCH 202/363] CI: Functional tests to store artifacts on timeout

---
 docker/test/fasttest/run.sh       | 18 +---------
 docker/test/sqllogic/Dockerfile   |  1 -
 docker/test/sqllogic/run.sh       |  2 +-
 docker/test/sqltest/Dockerfile    |  1 -
 docker/test/stateful/run.sh       | 25 +-------------
 docker/test/stateless/Dockerfile  |  1 -
 docker/test/stateless/run.sh      | 21 ++----------
 docker/test/stateless/utils.lib   | 16 ---------
 tests/ci/ci.py                    | 30 ++++++++--------
 tests/ci/ci_definitions.py        |  3 +-
 tests/ci/functional_test_check.py | 37 +++++++++++++++-----
 tests/ci/report.py                | 11 +++---
 tests/ci/tee_popen.py             | 57 +++++++++++++++++++++++++++----
 13 files changed, 107 insertions(+), 116 deletions(-)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 394d31addb1..9920326b11c 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -256,22 +256,6 @@ function configure
     rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
 }
 
-function timeout_with_logging() {
-    local exit_code=0
-
-    timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
-
-    echo "Checking if it is a timeout. The code 124 will indicate a timeout."
-    if [[ "${exit_code}" -eq "124" ]]
-    then
-        echo "The command 'timeout ${*}' has been killed by timeout."
-    else
-        echo "No, it isn't a timeout."
-    fi
-
-    return $exit_code
-}
-
 function run_tests
 {
     clickhouse-server --version
@@ -340,7 +324,7 @@ case "$stage" in
     configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
     ;&
 "run_tests")
-    timeout_with_logging 35m bash -c run_tests ||:
+    run_tests ||:
     /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
         --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
         --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile
index 1425e12cd84..6397526388e 100644
--- a/docker/test/sqllogic/Dockerfile
+++ b/docker/test/sqllogic/Dockerfile
@@ -35,7 +35,6 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
 
 
 ENV TZ=Europe/Amsterdam
-ENV MAX_RUN_TIME=9000
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 
 ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git"
diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh
index ccba344035e..32368980f9b 100755
--- a/docker/test/sqllogic/run.sh
+++ b/docker/test/sqllogic/run.sh
@@ -94,7 +94,7 @@ function run_tests()
 
 export -f run_tests
 
-timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2
+run_tests
 
 #/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
 
diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile
index 71d915b0c7a..b805bb03c2b 100644
--- a/docker/test/sqltest/Dockerfile
+++ b/docker/test/sqltest/Dockerfile
@@ -22,7 +22,6 @@ ARG sqltest_repo="https://github.com/elliotchance/sqltest/"
 RUN git clone ${sqltest_repo}
 
 ENV TZ=UTC
-ENV MAX_RUN_TIME=900
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 
 COPY run.sh /
diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh
index 3a4f0d97993..c072eeb0fa8 100755
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@@ -4,9 +4,6 @@
 source /setup_export_logs.sh
 set -e -x
 
-MAX_RUN_TIME=${MAX_RUN_TIME:-3600}
-MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME))
-
 # Choose random timezone for this test run
 TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
 echo "Choosen random timezone $TZ"
@@ -123,9 +120,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
 
     clickhouse-client --query "DROP TABLE datasets.hits_v1"
     clickhouse-client --query "DROP TABLE datasets.visits_v1"
-
-    MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000))  # min(MAX_RUN_TIME, 2.5 hours)
-    MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000))    # set to 2.5 hours if 0 (unlimited)
 else
     clickhouse-client --query "CREATE DATABASE test"
     clickhouse-client --query "SHOW TABLES FROM test"
@@ -257,24 +251,7 @@ function run_tests()
 
 export -f run_tests
 
-function timeout_with_logging() {
-    local exit_code=0
-
-    timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
-
-    echo "Checking if it is a timeout. The code 124 will indicate a timeout."
-    if [[ "${exit_code}" -eq "124" ]]
-    then
-        echo "The command 'timeout ${*}' has been killed by timeout."
-    else
-        echo "No, it isn't a timeout."
-    fi
-
-    return $exit_code
-}
-
-TIMEOUT=$((MAX_RUN_TIME - 700))
-timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
+run_tests ||:
 
 echo "Files in current directory"
 ls -la ./
diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index d8eb072328f..b0c4914a4e8 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -65,7 +65,6 @@ ENV TZ=Europe/Amsterdam
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 
 ENV NUM_TRIES=1
-ENV MAX_RUN_TIME=0
 
 # Unrelated to vars in setup_minio.sh, but should be the same there
 # to have the same binaries for local running scenario
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index c70cbe1fe45..ad0cd321cc5 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -12,9 +12,6 @@ dmesg --clear
 # fail on errors, verbose and export all env variables
 set -e -x -a
 
-MAX_RUN_TIME=${MAX_RUN_TIME:-9000}
-MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME))
-
 USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
 USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
 
@@ -308,8 +305,6 @@ function run_tests()
 
     try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
 
-    TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
-    START_TIME=${SECONDS}
     set +e
 
     TEST_ARGS=(
@@ -324,32 +319,22 @@ function run_tests()
         --test-runs "$NUM_TRIES"
         "${ADDITIONAL_OPTIONS[@]}"
     )
-    timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
+    clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
         | ts '%Y-%m-%d %H:%M:%S' \
         | tee -a test_output/test_result.txt
     set -e
-    DURATION=$((SECONDS - START_TIME))
-
-    echo "Elapsed ${DURATION} seconds."
-    if [[ $DURATION -ge $TIMEOUT ]]
-    then
-        echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds."
-    fi
 }
 
 export -f run_tests
 
-
-# This should be enough to setup job and collect artifacts
-TIMEOUT=$((MAX_RUN_TIME - 700))
 if [ "$NUM_TRIES" -gt "1" ]; then
     # We don't run tests with Ordinary database in PRs, only in master.
     # So run new/changed tests with Ordinary at least once in flaky check.
-    timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
+    NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \
       | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
 fi
 
-timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
+run_tests ||:
 
 echo "Files in current directory"
 ls -la ./
diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib
index cb257536c36..31cd67254b4 100644
--- a/docker/test/stateless/utils.lib
+++ b/docker/test/stateless/utils.lib
@@ -40,22 +40,6 @@ function fn_exists() {
     declare -F "$1" > /dev/null;
 }
 
-function timeout_with_logging() {
-    local exit_code=0
-
-    timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
-
-    echo "Checking if it is a timeout. The code 124 will indicate a timeout."
-    if [[ "${exit_code}" -eq "124" ]]
-    then
-        echo "The command 'timeout ${*}' has been killed by timeout."
-    else
-        echo "No, it isn't a timeout."
-    fi
-
-    return $exit_code
-}
-
 function collect_core_dumps()
 {
   find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 49b597333dc..1208d8642ad 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -50,7 +50,6 @@ from github_helper import GitHub
 from pr_info import PRInfo
 from report import (
     ERROR,
-    FAILURE,
     PENDING,
     SUCCESS,
     BuildResult,
@@ -62,11 +61,11 @@ from report import (
     FAIL,
 )
 from s3_helper import S3Helper
-from stopwatch import Stopwatch
 from tee_popen import TeePopen
 from ci_cache import CiCache
 from ci_settings import CiSettings
 from ci_buddy import CIBuddy
+from stopwatch import Stopwatch
 from version_helper import get_version_from_repo
 
 # pylint: disable=too-many-lines
@@ -370,8 +369,8 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
                 # skip_status = SUCCESS already there
                 GH.print_in_group("Commit Status Data", job_status)
 
-    # create pre report
-    jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped)
+    # create dummy report
+    jr = JobReport.create_dummy(status=skip_status, job_skipped=to_be_skipped)
     jr.dump()
 
     if not to_be_skipped:
@@ -990,19 +989,20 @@ def _run_test(job_name: str, run_command: str) -> int:
     stopwatch = Stopwatch()
     job_log = Path(TEMP_PATH) / "job_log.txt"
     with TeePopen(run_command, job_log, env, timeout) as process:
+        print(f"Job process started, pid [{process.process.pid}]")
         retcode = process.wait()
         if retcode != 0:
             print(f"Run action failed for: [{job_name}] with exit code [{retcode}]")
-            if timeout and process.timeout_exceeded:
-                print(f"Timeout {timeout} exceeded, dumping the job report")
-                JobReport(
-                    status=FAILURE,
-                    description=f"Timeout {timeout} exceeded",
-                    test_results=[TestResult.create_check_timeout_expired(timeout)],
-                    start_time=stopwatch.start_time_str,
-                    duration=stopwatch.duration_seconds,
-                    additional_files=[job_log],
-                ).dump()
+        if process.timeout_exceeded:
+            print(f"Job timed out: [{job_name}] exit code [{retcode}]")
+            assert JobReport.exist(), "JobReport real or dummy must be present"
+            jr = JobReport.load()
+            if jr.dummy:
+                print(
+                    f"ERROR: Run action failed with timeout and did not generate JobReport - update dummy report with execution time"
+                )
+                jr.test_results = [TestResult.create_check_timeout_expired()]
+                jr.duration = stopwatch.duration_seconds
 
     print(f"Run action done for: [{job_name}]")
     return retcode
@@ -1205,7 +1205,7 @@ def main() -> int:
             job_report
         ), "BUG. There must be job report either real report, or pre-report if job was killed"
         error_description = ""
-        if not job_report.pre_report:
+        if not job_report.dummy:
             # it's a real job report
             ch_helper = ClickHouseHelper()
             check_url = ""
diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py
index 1bed9db06f2..1d1c39f913d 100644
--- a/tests/ci/ci_definitions.py
+++ b/tests/ci/ci_definitions.py
@@ -332,7 +332,7 @@ class JobConfig:
     # will be triggered for the job if omitted in CI workflow yml
     run_command: str = ""
     # job timeout, seconds
-    timeout: Optional[int] = None
+    timeout: int = 7200
     # sets number of batches for a multi-batch job
     num_batches: int = 1
     # label that enables job in CI, if set digest isn't used
@@ -421,7 +421,6 @@ class CommonJobConfigs:
         ),
         run_command='functional_test_check.py "$CHECK_NAME"',
         runner_type=Runners.FUNC_TESTER,
-        timeout=9000,
     )
     STATEFUL_TEST = JobConfig(
         job_name_keyword="stateful",
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index b7391eff01b..d08f98fa05f 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -5,10 +5,11 @@ import csv
 import logging
 import os
 import re
+import signal
 import subprocess
 import sys
 from pathlib import Path
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 
 from build_download_helper import download_all_deb_packages
 from clickhouse_helper import CiLogsCredentials
@@ -25,11 +26,12 @@ from report import (
     TestResults,
     read_test_results,
     FAILURE,
+    TestResult,
 )
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
 from ci_config import CI
-from ci_utils import Utils
+from ci_utils import Utils, Shell
 
 NO_CHANGES_MSG = "Nothing to run"
 
@@ -113,10 +115,6 @@ def get_run_command(
 
     if flaky_check:
         envs.append("-e NUM_TRIES=50")
-        envs.append("-e MAX_RUN_TIME=2800")
-    else:
-        max_run_time = os.getenv("MAX_RUN_TIME", "0")
-        envs.append(f"-e MAX_RUN_TIME={max_run_time}")
 
     envs += [f"-e {e}" for e in additional_envs]
 
@@ -128,7 +126,7 @@ def get_run_command(
     )
 
     return (
-        f"docker run --volume={builds_path}:/package_folder "
+        f"docker run --rm --name func-tester --volume={builds_path}:/package_folder "
         # For dmesg and sysctl
         "--privileged "
         f"{ci_logs_args}"
@@ -198,7 +196,7 @@ def process_results(
     state, description = status[0][0], status[0][1]
     if ret_code != 0:
         state = ERROR
-        description += " (but script exited with an error)"
+        description = f"Job failed, exit code: {ret_code}. " + description
 
     try:
         results_path = result_directory / "test_results.tsv"
@@ -240,7 +238,19 @@ def parse_args():
     return parser.parse_args()
 
 
+test_process = None  # type: Optional[TeePopen]
+timeout_expired = False
+
+
+def handle_sigterm(signum, _frame):
+    print(f"WARNING: Received signal {signum}")
+    global timeout_expired
+    timeout_expired = True
+    Shell.check(f"docker exec func-tester pkill -f clickhouse-test", verbose=True)
+
+
 def main():
+    signal.signal(signal.SIGTERM, handle_sigterm)
     logging.basicConfig(level=logging.INFO)
     for handler in logging.root.handlers:
         # pylint: disable=protected-access
@@ -328,11 +338,13 @@ def main():
         logging.info("Going to run func tests: %s", run_command)
 
         with TeePopen(run_command, run_log_path) as process:
+            global test_process
+            test_process = process
             retcode = process.wait()
             if retcode == 0:
                 logging.info("Run successfully")
             else:
-                logging.info("Run failed")
+                logging.info("Run failed, exit code %s", retcode)
 
         try:
             subprocess.check_call(
@@ -348,6 +360,13 @@ def main():
         state, description, test_results, additional_logs = process_results(
             retcode, result_path, server_log_path
         )
+        if timeout_expired:
+            description = "Timeout expired"
+            state = FAILURE
+            test_results.insert(
+                0, TestResult.create_check_timeout_expired(stopwatch.duration_seconds)
+            )
+
     else:
         print(
             "This is validate bugfix or flaky check run, but no changes test to run - skip with success"
diff --git a/tests/ci/report.py b/tests/ci/report.py
index 6779a6dae96..c2632719aef 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -249,6 +249,7 @@ JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json"
 
 JOB_STARTED_TEST_NAME = "STARTED"
 JOB_FINISHED_TEST_NAME = "COMPLETED"
+JOB_TIMEOUT_TEST_NAME = "Job Timeout Expired"
 
 
 @dataclass
@@ -277,8 +278,8 @@ class TestResult:
             self.log_files.append(log_path)
 
     @staticmethod
-    def create_check_timeout_expired(timeout: float) -> "TestResult":
-        return TestResult("Check timeout expired", "FAIL", timeout)
+    def create_check_timeout_expired(duration: Optional[float] = None) -> "TestResult":
+        return TestResult(JOB_TIMEOUT_TEST_NAME, "FAIL", time=duration)
 
 
 TestResults = List[TestResult]
@@ -303,7 +304,7 @@ class JobReport:
     # indicates that this is not real job report but report for the job that was skipped by rerun check
     job_skipped: bool = False
     # indicates that report generated by CI script in order to check later if job was killed before real report is generated
-    pre_report: bool = False
+    dummy: bool = False
     exit_code: int = -1
 
     @staticmethod
@@ -311,7 +312,7 @@ class JobReport:
         return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
 
     @classmethod
-    def create_pre_report(cls, status: str, job_skipped: bool) -> "JobReport":
+    def create_dummy(cls, status: str, job_skipped: bool) -> "JobReport":
         return JobReport(
             status=status,
             description="",
@@ -320,7 +321,7 @@ class JobReport:
             duration=0.0,
             additional_files=[],
             job_skipped=job_skipped,
-            pre_report=True,
+            dummy=True,
         )
 
     def update_duration(self):
diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py
index 13db50df53f..ad3e62dab9c 100644
--- a/tests/ci/tee_popen.py
+++ b/tests/ci/tee_popen.py
@@ -2,6 +2,8 @@
 
 import logging
 import os
+import signal
+import subprocess
 import sys
 from io import TextIOWrapper
 from pathlib import Path
@@ -30,20 +32,35 @@ class TeePopen:
         self._process = None  # type: Optional[Popen]
         self.timeout = timeout
         self.timeout_exceeded = False
+        self.terminated_by_sigterm = False
+        self.terminated_by_sigkill = False
+        self.pid = 0
 
     def _check_timeout(self) -> None:
         if self.timeout is None:
             return
         sleep(self.timeout)
+        logging.warning(
+            "Timeout exceeded. Send SIGTERM to process %s, timeout %s",
+            self.process.pid,
+            self.timeout,
+        )
+        self.send_signal(signal.SIGTERM)
+        time_wait = 0
+        self.terminated_by_sigterm = True
         self.timeout_exceeded = True
+        while self.process.poll() is None and time_wait < 100:
+            print("wait...")
+            wait = 5
+            sleep(wait)
+            time_wait += wait
         while self.process.poll() is None:
-            logging.warning(
-                "Killing process %s, timeout %s exceeded",
-                self.process.pid,
-                self.timeout,
+            logging.error(
+                "Process is still running. Send SIGKILL",
             )
-            os.killpg(self.process.pid, 9)
-            sleep(10)
+            self.send_signal(signal.SIGKILL)
+            self.terminated_by_sigkill = True
+            sleep(5)
 
     def __enter__(self) -> "TeePopen":
         self.process = Popen(
@@ -57,6 +74,9 @@ class TeePopen:
             bufsize=1,
             errors="backslashreplace",
         )
+        sleep(1)
+        self.pid = self._get_child_pid()
+        print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]")
         if self.timeout is not None and self.timeout > 0:
             t = Thread(target=self._check_timeout)
             t.daemon = True  # does not block the program from exit
@@ -77,6 +97,22 @@ class TeePopen:
 
         self.log_file.close()
 
+    def _get_child_pid(self):
+        # linux only
+        ps_command = f"ps --ppid {self.process.pid} -o pid="
+        res = "NA"
+        try:
+            result = subprocess.run(
+                ps_command, shell=True, capture_output=True, text=True
+            )
+            res = result.stdout.strip()
+            pid = int(res)
+            return pid
+        except Exception as e:
+            print(f"Failed to get child's pid, command [{ps_command}], result [{res}]")
+            print(f"ERROR: getting Python subprocess PID: {e}")
+            return self.process.pid
+
     def wait(self) -> int:
         if self.process.stdout is not None:
             for line in self.process.stdout:
@@ -85,6 +121,15 @@ class TeePopen:
 
         return self.process.wait()
 
+    def poll(self):
+        return self.process.poll()
+
+    def send_signal(self, signal_num):
+        if self.pid:
+            os.kill(self.pid, signal_num)
+        else:
+            print("ERROR: no process to send signal")
+
     @property
     def process(self) -> Popen:
         if self._process is not None:

From 8e35b082b2e3315655110bdce4238217dfe85914 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Sat, 10 Aug 2024 10:01:16 +0200
Subject: [PATCH 203/363] teepopen fix

---
 tests/ci/tee_popen.py | 26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py
index ad3e62dab9c..53b0a0f6c2c 100644
--- a/tests/ci/tee_popen.py
+++ b/tests/ci/tee_popen.py
@@ -3,7 +3,6 @@
 import logging
 import os
 import signal
-import subprocess
 import sys
 from io import TextIOWrapper
 from pathlib import Path
@@ -34,7 +33,6 @@ class TeePopen:
         self.timeout_exceeded = False
         self.terminated_by_sigterm = False
         self.terminated_by_sigkill = False
-        self.pid = 0
 
     def _check_timeout(self) -> None:
         if self.timeout is None:
@@ -75,8 +73,7 @@ class TeePopen:
             errors="backslashreplace",
         )
         sleep(1)
-        self.pid = self._get_child_pid()
-        print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]")
+        print(f"Subprocess started, pid [{self.process.pid}]")
         if self.timeout is not None and self.timeout > 0:
             t = Thread(target=self._check_timeout)
             t.daemon = True  # does not block the program from exit
@@ -97,22 +94,6 @@ class TeePopen:
 
         self.log_file.close()
 
-    def _get_child_pid(self):
-        # linux only
-        ps_command = f"ps --ppid {self.process.pid} -o pid="
-        res = "NA"
-        try:
-            result = subprocess.run(
-                ps_command, shell=True, capture_output=True, text=True
-            )
-            res = result.stdout.strip()
-            pid = int(res)
-            return pid
-        except Exception as e:
-            print(f"Failed to get child's pid, command [{ps_command}], result [{res}]")
-            print(f"ERROR: getting Python subprocess PID: {e}")
-            return self.process.pid
-
     def wait(self) -> int:
         if self.process.stdout is not None:
             for line in self.process.stdout:
@@ -125,10 +106,7 @@ class TeePopen:
         return self.process.poll()
 
     def send_signal(self, signal_num):
-        if self.pid:
-            os.kill(self.pid, signal_num)
-        else:
-            print("ERROR: no process to send signal")
+        os.killpg(self.process.pid, signal_num)
 
     @property
     def process(self) -> Popen:

From 66fa5a154a8895f481a598616df93f7cb83e42cd Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Mon, 12 Aug 2024 02:34:22 +0200
Subject: [PATCH 204/363] tune timeouts, batches

---
 tests/ci/ci_config.py      | 7 ++++---
 tests/ci/ci_definitions.py | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 173c6c9c931..99f4ed38475 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -316,6 +316,7 @@ class CI:
         JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_TSAN],
             random_bucket="parrepl_with_sanitizer",
+            timeout=3600,
         ),
         JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2
@@ -346,7 +347,7 @@ class CI:
             required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4
         ),
         JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties(
-            required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2
+            required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1
         ),
         JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_ASAN], num_batches=3, release_only=True
@@ -401,14 +402,14 @@ class CI:
             required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4
         ),
         JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties(
-            required_builds=[BuildNames.PACKAGE_ASAN], num_batches=6
+            required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4
         ),
         JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6
         ),
         JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_AARCH64],
-            num_batches=6,
+            num_batches=3,
             runner_type=Runners.FUNC_TESTER_ARM,
         ),
         JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties(
diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py
index 1d1c39f913d..13c222b10b9 100644
--- a/tests/ci/ci_definitions.py
+++ b/tests/ci/ci_definitions.py
@@ -465,6 +465,7 @@ class CommonJobConfigs:
         ),
         run_command="upgrade_check.py",
         runner_type=Runners.STRESS_TESTER,
+        timeout=3600,
     )
     INTEGRATION_TEST = JobConfig(
         job_name_keyword="integration",

From 1deeca40dbbbc14373e51d830b851b54b82e5efa Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Thu, 15 Aug 2024 13:11:10 +0200
Subject: [PATCH 205/363] Handling timeout in integration tests

---
 tests/ci/ci.py                       | 13 ++++++++-
 tests/ci/ci_config.py                |  3 ++-
 tests/ci/integration_test_check.py   |  2 +-
 tests/ci/integration_tests_runner.py | 40 +++++++++++++++++++++++++++-
 4 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 1208d8642ad..a9ae078b449 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1003,6 +1003,7 @@ def _run_test(job_name: str, run_command: str) -> int:
                 )
                 jr.test_results = [TestResult.create_check_timeout_expired()]
                 jr.duration = stopwatch.duration_seconds
+                jr.additional_files += [job_log]
 
     print(f"Run action done for: [{job_name}]")
     return retcode
@@ -1329,10 +1330,20 @@ def main() -> int:
             if CI.is_test_job(args.job_name):
                 gh = GitHub(get_best_robot_token(), per_page=100)
                 commit = get_commit(gh, pr_info.sha)
+                check_url = ""
+                if job_report.test_results or job_report.additional_files:
+                    check_url = upload_result_helper.upload_results(
+                        s3,
+                        pr_info.number,
+                        pr_info.sha,
+                        job_report.test_results,
+                        job_report.additional_files,
+                        job_report.check_name or _get_ext_check_name(args.job_name),
+                    )
                 post_commit_status(
                     commit,
                     ERROR,
-                    "",
+                    check_url,
                     "Error: " + error_description,
                     _get_ext_check_name(args.job_name),
                     pr_info,
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 99f4ed38475..b5e424c2b3f 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -402,7 +402,8 @@ class CI:
             required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4
         ),
         JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties(
-            required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4
+            required_builds=[BuildNames.PACKAGE_ASAN],
+            num_batches=3,
         ),
         JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index 6245f0490fc..7232ca375a1 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -29,7 +29,7 @@ from stopwatch import Stopwatch
 
 import integration_tests_runner as runner
 from ci_config import CI
-from ci_utils import Utils
+from ci_utils import Utils, Shell
 
 
 def get_json_params_dict(
diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py
index f5dbef4f6db..d3cd3d16de1 100755
--- a/tests/ci/integration_tests_runner.py
+++ b/tests/ci/integration_tests_runner.py
@@ -9,6 +9,7 @@ import random
 import re
 import shlex
 import shutil
+import signal
 import string
 import subprocess
 import sys
@@ -16,11 +17,13 @@ import time
 import zlib  # for crc32
 from collections import defaultdict
 from itertools import chain
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 from env_helper import IS_CI
 from integration_test_images import IMAGES
 from tee_popen import TeePopen
+from report import JOB_TIMEOUT_TEST_NAME
+from stopwatch import Stopwatch
 
 MAX_RETRY = 1
 NUM_WORKERS = 5
@@ -621,6 +624,9 @@ class ClickhouseIntegrationTestsRunner:
         test_data_dirs = {}
 
         for i in range(num_tries):
+            if timeout_expired:
+                print("Timeout expired - break test group execution")
+                break
             logging.info("Running test group %s for the %s retry", test_group, i)
             clear_ip_tables_and_restart_daemons()
 
@@ -657,6 +663,8 @@ class ClickhouseIntegrationTestsRunner:
                 logging.info("Executing cmd: %s", cmd)
                 # ignore retcode, since it meaningful due to pipe to tee
                 with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as proc:
+                    global runner_subprocess
+                    runner_subprocess = proc
                     proc.wait()
 
             extra_logs_names = [log_basename]
@@ -780,6 +788,9 @@ class ClickhouseIntegrationTestsRunner:
         logs = []
         tries_num = 1 if should_fail else FLAKY_TRIES_COUNT
         for i in range(tries_num):
+            if timeout_expired:
+                print("Timeout expired - break flaky check execution")
+                break
             final_retry += 1
             logging.info("Running tests for the %s time", i)
             counters, tests_times, log_paths = self.try_run_test_group(
@@ -839,6 +850,7 @@ class ClickhouseIntegrationTestsRunner:
         return result_state, status_text, test_result, logs
 
     def run_impl(self, repo_path, build_path):
+        stopwatch = Stopwatch()
         if self.flaky_check or self.bugfix_validate_check:
             return self.run_flaky_check(
                 repo_path, build_path, should_fail=self.bugfix_validate_check
@@ -921,6 +933,9 @@ class ClickhouseIntegrationTestsRunner:
             random.shuffle(items_to_run)
 
         for group, tests in items_to_run:
+            if timeout_expired:
+                print("Timeout expired - break tests execution")
+                break
             logging.info("Running test group %s containing %s tests", group, len(tests))
             group_counters, group_test_times, log_paths = self.try_run_test_group(
                 repo_path, group, tests, MAX_RETRY, NUM_WORKERS, 0
@@ -981,6 +996,17 @@ class ClickhouseIntegrationTestsRunner:
             status_text = "Timeout, " + status_text
             result_state = "failure"
 
+        if timeout_expired:
+            logging.error(
+                "Job killed by external timeout signal - setting status to failure!"
+            )
+            status_text = "Job timeout expired, " + status_text
+            result_state = "failure"
+            # add mock test case to make timeout visible in job report and in ci db
+            test_result.insert(
+                0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "")
+            )
+
         if not counters or sum(len(counter) for counter in counters.values()) == 0:
             status_text = "No tests found for some reason! It's a bug"
             result_state = "failure"
@@ -1001,6 +1027,7 @@ def write_results(results_file, status_file, results, status):
 
 
 def run():
+    signal.signal(signal.SIGTERM, handle_sigterm)
     logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
 
     repo_path = os.environ.get("CLICKHOUSE_TESTS_REPO_PATH")
@@ -1035,5 +1062,16 @@ def run():
     logging.info("Result written")
 
 
+timeout_expired = False
+runner_subprocess = None  # type:Optional[subprocess.Popen]
+
+
+def handle_sigterm(signum, _frame):
+    print(f"WARNING: Received signal {signum}")
+    global timeout_expired
+    timeout_expired = True
+    runner_subprocess.send_signal(signal.SIGTERM)
+
+
 if __name__ == "__main__":
     run()

From dde7ee29fc594f87bb35880bede845c4d4f29423 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Fri, 16 Aug 2024 10:22:12 +0200
Subject: [PATCH 206/363] sort tests in report by status

---
 tests/ci/ci_config.py                |  8 ++++----
 tests/ci/integration_test_check.py   |  2 +-
 tests/ci/integration_tests_runner.py |  3 ++-
 tests/ci/report.py                   | 19 +++++++++++++++----
 4 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index b5e424c2b3f..8ce0b9fde5a 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -344,7 +344,7 @@ class CI:
             runner_type=Runners.FUNC_TESTER_ARM,
         ),
         JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties(
-            required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4
+            required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=2
         ),
         JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1
@@ -354,7 +354,7 @@ class CI:
         ),
         JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_TSAN],
-            num_batches=4,
+            num_batches=3,
         ),
         JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_DEBUG],
@@ -403,14 +403,14 @@ class CI:
         ),
         JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_ASAN],
-            num_batches=3,
+            num_batches=6,
         ),
         JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6
         ),
         JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_AARCH64],
-            num_batches=3,
+            num_batches=6,
             runner_type=Runners.FUNC_TESTER_ARM,
         ),
         JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties(
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index 7232ca375a1..6245f0490fc 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -29,7 +29,7 @@ from stopwatch import Stopwatch
 
 import integration_tests_runner as runner
 from ci_config import CI
-from ci_utils import Utils, Shell
+from ci_utils import Utils
 
 
 def get_json_params_dict(
diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py
index d3cd3d16de1..c3b71b85022 100755
--- a/tests/ci/integration_tests_runner.py
+++ b/tests/ci/integration_tests_runner.py
@@ -1070,7 +1070,8 @@ def handle_sigterm(signum, _frame):
     print(f"WARNING: Received signal {signum}")
     global timeout_expired
     timeout_expired = True
-    runner_subprocess.send_signal(signal.SIGTERM)
+    if runner_subprocess:
+        runner_subprocess.send_signal(signal.SIGTERM)
 
 
 if __name__ == "__main__":
diff --git a/tests/ci/report.py b/tests/ci/report.py
index c2632719aef..a1b25b994c7 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -742,10 +742,21 @@ def create_test_html_report(
         has_test_time = any(tr.time is not None for tr in test_results)
         has_log_urls = False
 
-        # Display entires with logs at the top (they correspond to failed tests)
-        test_results.sort(
-            key=lambda result: result.raw_logs is None and result.log_files is None
-        )
+        def sort_key(status):
+            if "fail" in status.lower():
+                return 0
+            elif "error" in status.lower():
+                return 1
+            elif "not" in status.lower():
+                return 2
+            elif "ok" in status.lower():
+                return 10
+            elif "success" in status.lower():
+                return 9
+            else:
+                return 5
+
+        test_results.sort(key=lambda result: sort_key(result.status))
 
         for test_result in test_results:
             colspan = 0

From 29fd5a6c90caeafee8bc930918f7f5544a1658a2 Mon Sep 17 00:00:00 2001
From: shiyer7474 <shiyer@altinity.com>
Date: Sun, 18 Aug 2024 15:10:35 +0000
Subject: [PATCH 207/363] Add explicit session_timezone to UTC

---
 tests/queries/0_stateless/03222_datetime64_small_value_const.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
index af06a622f8d..39266ba7992 100644
--- a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
+++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
@@ -1,4 +1,5 @@
 -- Tags: shard
+set session_timezone = 'UTC'; -- don't randomize the session timezone
 
 select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;
 select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;

From 90330077e5595c000cec82c8a0819db339296d33 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 18 Aug 2024 17:56:44 +0000
Subject: [PATCH 208/363] fix test

---
 .../queries/0_stateless/03221_refreshable_matview_progress.sql  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
index ecb385c9bfa..98e1c48478d 100644
--- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
+++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql
@@ -1,4 +1,4 @@
--- Tags: no-ordinary-database
+-- Tags: no-replicated-database, no-ordinary-database
 
 set allow_experimental_refreshable_materialized_view=1;
 

From 683b84e6b66edbfbba12a3c56aad9aefd717bde2 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Sun, 18 Aug 2024 19:37:52 +0100
Subject: [PATCH 209/363] fix

---
 src/Storages/MergeTree/MergeTreeReadPool.cpp     | 4 ++++
 src/Storages/MergeTree/MergeTreeReadPoolBase.cpp | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp
index cc321cd5a4d..9927d369104 100644
--- a/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp
@@ -235,6 +235,10 @@ void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks)
             const auto part_idx = current_parts.back().part_idx;
             const auto min_marks_per_task = per_part_infos[part_idx]->min_marks_per_task;
 
+            if (min_marks_per_task == 0)
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
+
             /// Do not get too few rows from part.
             if (marks_in_part >= min_marks_per_task && need_marks < min_marks_per_task)
                 need_marks = min_marks_per_task;
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
index 9d3c38822e1..1cc13102794 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
@@ -87,10 +87,6 @@ static size_t calculateMinMarksPerTask(
         }
     }
 
-    if (min_marks_per_task == 0)
-        throw Exception(
-            ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
-
     LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task);
     return min_marks_per_task;
 }

From 3883627aad0e23105ec9e1f985039cd27dcf227d Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Sun, 18 Aug 2024 20:51:40 +0100
Subject: [PATCH 210/363] fix

---
 src/Storages/MergeTree/MergeTreeReadPool.cpp     | 1 +
 src/Storages/MergeTree/MergeTreeReadPoolBase.cpp | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp
index 9927d369104..23c314e48f5 100644
--- a/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp
@@ -24,6 +24,7 @@ namespace ErrorCodes
 {
 extern const int CANNOT_SCHEDULE_TASK;
 extern const int LOGICAL_ERROR;
+extern const int BAD_ARGUMENTS;
 }
 
 MergeTreeReadPool::MergeTreeReadPool(
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
index 1cc13102794..95a10454f9e 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp
@@ -13,7 +13,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
-    extern const int BAD_ARGUMENTS;
 }
 
 MergeTreeReadPoolBase::MergeTreeReadPoolBase(

From a258b4fb3dcdb8fa2484132aa72e9cece618d488 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 22:18:31 +0200
Subject: [PATCH 211/363] Fix race condition in MergeTree restarting thread

---
 .../ReplicatedMergeTreeRestartingThread.cpp       | 15 ++++++++++++++-
 .../ReplicatedMergeTreeRestartingThread.h         | 11 ++---------
 src/Storages/StorageReplicatedMergeTree.cpp       | 10 ++++------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
index 05fd6f6915b..d3ccda904b6 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
@@ -7,7 +7,6 @@
 #include <Interpreters/Context.h>
 #include <Common/FailPoint.h>
 #include <Common/ZooKeeper/KeeperException.h>
-#include <Common/randomSeed.h>
 #include <Core/ServerUUID.h>
 #include <boost/algorithm/string/replace.hpp>
 
@@ -49,6 +48,20 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage
     task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ run(); });
 }
 
+void ReplicatedMergeTreeRestartingThread::start(bool schedule)
+{
+    LOG_TRACE(log, "Starting the restating thread, schedule: {}", schedule);
+    if (schedule)
+        task->activateAndSchedule();
+    else
+        task->activate();
+}
+
+void ReplicatedMergeTreeRestartingThread::wakeup()
+{
+    task->schedule();
+}
+
 void ReplicatedMergeTreeRestartingThread::run()
 {
     if (need_stop)
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
index 01071d80e8b..d719505ae5e 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
@@ -24,16 +24,9 @@ class ReplicatedMergeTreeRestartingThread
 public:
     explicit ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_);
 
-    void start(bool schedule = true)
-    {
-        LOG_TRACE(log, "Starting restating thread, schedule: {}", schedule);
-        if (schedule)
-            task->activateAndSchedule();
-        else
-            task->activate();
-    }
+    void start(bool schedule);
 
-    void wakeup() { task->schedule(); }
+    void wakeup();
 
     void shutdown(bool part_of_full_shutdown);
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 068ff1387b3..ff8e362aa36 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5194,17 +5194,16 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
 
         startBeingLeader();
 
-        /// Activate replica in a separate thread if we are not calling from attach thread
-        restarting_thread.start(/*schedule=*/!from_attach_thread);
-
         if (from_attach_thread)
         {
             LOG_TRACE(log, "Trying to startup table from right now");
-            /// Try activating replica in current thread.
+            /// Try activating replica in the current thread.
             restarting_thread.run();
+            restarting_thread.start(false);
         }
         else
         {
+            restarting_thread.start(true);
             /// Wait while restarting_thread finishing initialization.
             /// NOTE It does not mean that replication is actually started after receiving this event.
             /// It only means that an attempt to startup replication was made.
@@ -5225,7 +5224,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
         session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]()
         {
             LOG_TEST(log, "Received event for expired session. Waking up restarting thread");
-            restarting_thread.start();
+            restarting_thread.start(true);
         });
 
         startBackgroundMovesIfNeeded();
@@ -5294,7 +5293,6 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
             LOG_TRACE(log, "The attach thread is shutdown");
         }
 
-
         restarting_thread.shutdown(/* part_of_full_shutdown */true);
         /// Explicitly set the event, because the restarting thread will not set it again
         startup_event.set();

From 3e5d070b8fb47600979a1a7cc672edc3a7327004 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 22:40:42 +0200
Subject: [PATCH 212/363] Fix tests

---
 .../00804_test_deflate_qpl_codec_compression.reference        | 2 +-
 .../00804_test_zstd_qat_codec_compression.reference           | 2 +-
 .../03227_print_pretty_tuples_create_query.reference          | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference
index a2178f5eda7..a6e03404f2b 100644
--- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference
+++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference
@@ -1,4 +1,4 @@
-CREATE TABLE default.compression_codec\n(\n    `id` UInt64 CODEC(DEFLATE_QPL),\n    `data` String CODEC(DEFLATE_QPL),\n    `ddd` Date CODEC(DEFLATE_QPL),\n    `ddd32` Date32 CODEC(DEFLATE_QPL),\n    `somenum` Float64 CODEC(DEFLATE_QPL),\n    `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n    `othernum` Int64 CODEC(DEFLATE_QPL),\n    `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n    `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n    `sometuple` Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
+CREATE TABLE default.compression_codec\n(\n    `id` UInt64 CODEC(DEFLATE_QPL),\n    `data` String CODEC(DEFLATE_QPL),\n    `ddd` Date CODEC(DEFLATE_QPL),\n    `ddd32` Date32 CODEC(DEFLATE_QPL),\n    `somenum` Float64 CODEC(DEFLATE_QPL),\n    `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n    `othernum` Int64 CODEC(DEFLATE_QPL),\n    `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n    `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n    `sometuple` Tuple(\n        UInt16,\n        UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
 1	hello	2018-12-14	2018-12-14	1.1	aaa	5	[1,2,3]	{'k1':1,'k2':2}	(1,2)
 2	world	2018-12-15	2018-12-15	2.2	bbb	6	[4,5,6]	{'k3':3,'k4':4}	(3,4)
 3	!	2018-12-16	2018-12-16	3.3	ccc	7	[7,8,9]	{'k5':5,'k6':6}	(5,6)
diff --git a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference
index 31a4360469f..ff70403ce7a 100644
--- a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference
+++ b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference
@@ -1,4 +1,4 @@
-CREATE TABLE default.compression_codec\n(\n    `id` UInt64 CODEC(ZSTD_QAT(1)),\n    `data` String CODEC(ZSTD_QAT(1)),\n    `ddd` Date CODEC(ZSTD_QAT(1)),\n    `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n    `somenum` Float64 CODEC(ZSTD_QAT(1)),\n    `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n    `othernum` Int64 CODEC(ZSTD_QAT(1)),\n    `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n    `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n    `sometuple` Tuple(UInt16, UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
+CREATE TABLE default.compression_codec\n(\n    `id` UInt64 CODEC(ZSTD_QAT(1)),\n    `data` String CODEC(ZSTD_QAT(1)),\n    `ddd` Date CODEC(ZSTD_QAT(1)),\n    `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n    `somenum` Float64 CODEC(ZSTD_QAT(1)),\n    `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n    `othernum` Int64 CODEC(ZSTD_QAT(1)),\n    `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n    `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n    `sometuple` Tuple(\n        UInt16,\n        UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
 1	hello	2018-12-14	2018-12-14	1.1	aaa	5	[1,2,3]	{'k1':1,'k2':2}	(1,2)
 2	world	2018-12-15	2018-12-15	2.2	bbb	6	[4,5,6]	{'k3':3,'k4':4}	(3,4)
 3	!	2018-12-16	2018-12-16	3.3	ccc	7	[7,8,9]	{'k5':5,'k6':6}	(5,6)
diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference
index c65dc32a224..afaaaaa6119 100644
--- a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference
+++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference
@@ -1,6 +1,6 @@
 
 SHOW CREATE TABLE:
-CREATE TABLE test.test
+CREATE TABLE default.test
 (
     `x` Tuple(
         a String,
@@ -13,7 +13,7 @@ CREATE TABLE test.test
 ENGINE = MergeTree
 ORDER BY tuple()
 SETTINGS index_granularity = 8192
-CREATE TABLE test.test
+CREATE TABLE default.test
 (
     `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))),
     `y` String

From 8f2c20806a7b757beecb99e52a8d5a1dcab8df07 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 22:45:13 +0200
Subject: [PATCH 213/363] Fix test `01079_bad_alters_zookeeper_long`

---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 tests/clickhouse-test                       | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 068ff1387b3..66dac0dfe31 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -6342,7 +6342,7 @@ void StorageReplicatedMergeTree::alter(
                                 "Metadata on replica is not up to date with common metadata in Zookeeper. "
                                 "It means that this replica still not applied some of previous alters."
                                 " Probably too many alters executing concurrently (highly not recommended). "
-                                "You can retry this error");
+                                "You can retry the query");
 
             /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level.
             if (query_context->getZooKeeperMetadataTransaction())
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index a3d7e0e922d..1203ad3730a 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -59,6 +59,7 @@ MESSAGES_TO_RETRY = [
     "is already started to be removing by another replica right now",
     # This is from LSan, and it indicates its own internal problem:
     "Unable to get registers from thread",
+    "You can retry",
 ]
 
 MAX_RETRIES = 3

From 207ef87782eca80410ed5747d971be7ddce65e6c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 18 Aug 2024 22:55:38 +0200
Subject: [PATCH 214/363] Fix tests

---
 tests/queries/0_stateless/01825_new_type_json_ghdata.sh        | 3 ++-
 .../0_stateless/01825_type_json_ghdata_insert_select.sh        | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
index fbd7d897fb8..b2f20d825dd 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest
+# Tags: no-fasttest, no-s3-storage
+# ^ no-s3-storage: too memory hungry
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh
index 711194e71a1..17398d9a0c1 100755
--- a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh
+++ b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest
+# Tags: no-fasttest, no-s3-storage
+# ^ no-s3-storage: too memory hungry
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 5ed3c29d4a1e1992671af7168c0a1b01757d97bb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Aug 2024 01:18:12 +0200
Subject: [PATCH 215/363] Update tests

---
 tests/queries/0_stateless/01825_new_type_json_ghdata.sh         | 2 +-
 .../queries/0_stateless/01825_type_json_ghdata_insert_select.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
index b2f20d825dd..33940caec29 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-s3-storage
+# Tags: no-fasttest, no-s3-storage, long
 # ^ no-s3-storage: too memory hungry
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
diff --git a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh
index 17398d9a0c1..fc503b345d9 100755
--- a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh
+++ b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-s3-storage
+# Tags: no-fasttest, no-s3-storage, long
 # ^ no-s3-storage: too memory hungry
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

From 8eb922036e5b7caa36c1b904b43fdaee8e45acaa Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 19 Aug 2024 12:45:15 +0800
Subject: [PATCH 216/363] change as request

---
 .../functions/string-replace-functions.md     |  16 +-
 src/Functions/overlay.cpp                     | 140 +++++------
 .../0_stateless/03205_overlay.reference       | 230 +++++-------------
 tests/queries/0_stateless/03205_overlay.sql   | 132 +++++-----
 .../0_stateless/03206_overlay_utf8.reference  | 168 -------------
 .../0_stateless/03206_overlay_utf8.sql        |  60 -----
 6 files changed, 203 insertions(+), 543 deletions(-)
 delete mode 100644 tests/queries/0_stateless/03206_overlay_utf8.reference
 delete mode 100644 tests/queries/0_stateless/03206_overlay_utf8.sql

diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index d086c9ee64b..408a896e607 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -251,24 +251,24 @@ select printf('%%%s %s %d', 'Hello', 'World', 2024);
 
 ## overlay
 
-Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`.
+Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`.
 
 **Syntax**
 
 ```sql
-overlay(s, replace, position[, length])
+overlay(s, replace, offset[, length])
 ```
 
 **Parameters**
 
 - `s`: A string type [String](../data-types/string.md).
 - `replace`: A string type [String](../data-types/string.md).
-- `position`: An integer type [Int](../data-types/int-uint.md).
+- `offset`: An integer type [Int](../data-types/int-uint.md).
 - `length`: Optional. An integer type [Int](../data-types/int-uint.md).
 
 **Returned value**
 
-- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced.
+- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced.
 
 **Example**
 
@@ -286,26 +286,26 @@ Result:
 
 ## overlayUTF8
 
-Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`.
+Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`.
 
 Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
 
 **Syntax**
 
 ```sql
-overlayUTF8(s, replace, position[, length])
+overlayUTF8(s, replace, offset[, length])
 ```
 
 **Parameters**
 
 - `s`: A string type [String](../data-types/string.md).
 - `replace`: A string type [String](../data-types/string.md).
-- `position`: An integer type [Int](../data-types/int-uint.md).
+- `offset`: An integer type [Int](../data-types/int-uint.md).
 - `length`: Optional. An integer type [Int](../data-types/int-uint.md).
 
 **Returned value**
 
-- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced.
+- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced.
 
 **Example**
 
diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp
index 094da27a71d..73ca0acbb8e 100644
--- a/src/Functions/overlay.cpp
+++ b/src/Functions/overlay.cpp
@@ -17,13 +17,13 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
-using namespace GatherUtils;
-
 namespace
 {
 
 /// If 'is_utf8' - measure offset and length in code points instead of bytes.
-/// Syntax: overlay(input, replace, offset[, length])
+/// Syntax:
+/// - overlay(input, replace, offset[, length])
+/// - overlayUTF8(input, replace, offset[, length]) - measure offset and length in code points instead of bytes
 template <bool is_utf8>
 class FunctionOverlay : public IFunction
 {
@@ -37,63 +37,39 @@ public:
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
     bool useDefaultImplementationForConstants() const override { return true; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        const size_t number_of_arguments = arguments.size();
-        if (number_of_arguments < 3 || number_of_arguments > 4)
-            throw Exception(
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Number of arguments for function {} doesn't match: "
-                "passed {}, should be 3 or 4",
-                getName(),
-                number_of_arguments);
+        FunctionArgumentDescriptors mandatory_args{
+            {"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
+            {"replace", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
+            {"offset", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "(U)Int8/16/32/64"},
+        };
 
-        /// first argument is string
-        if (!isString(arguments[0]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of first argument of function {}, expected String",
-                arguments[0]->getName(),
-                getName());
+        FunctionArgumentDescriptors optional_args{
+            {"length", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "(U)Int8/16/32/64"},
+        };
 
-        /// second argument is string
-        if (!isString(arguments[1]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of second argument of function {}, expected String",
-                arguments[1]->getName(),
-                getName());
-
-        if (!isNativeNumber(arguments[2]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of third argument of function {}, expected (U)Int8|16|32|64",
-                arguments[2]->getName(),
-                getName());
-
-        if (number_of_arguments == 4 && !isNativeNumber(arguments[3]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of second argument of function {}, expected (U)Int8|16|32|64",
-                arguments[3]->getName(),
-                getName());
+        validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
 
         return std::make_shared<DataTypeString>();
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
+        if (input_rows_count == 0)
+            return ColumnString::create();
+
         const size_t number_of_arguments = arguments.size();
-        bool three_args = number_of_arguments == 3;
+        bool has_three_args = number_of_arguments == 3;
 
         ColumnPtr column_offset = arguments[2].column;
         ColumnPtr column_length;
-        if (!three_args)
+        if (!has_three_args)
             column_length = arguments[3].column;
 
         const ColumnConst * column_offset_const = checkAndGetColumn<ColumnConst>(column_offset.get());
         const ColumnConst * column_length_const = nullptr;
-        if (!three_args)
+        if (!has_three_args)
             column_length_const = checkAndGetColumn<ColumnConst>(column_length.get());
 
         bool offset_is_const = false;
@@ -126,7 +102,7 @@ public:
         if (column_input_const)
         {
             StringRef input = column_input_const->getDataAt(0);
-            res_data.reserve(input.size * input_rows_count);
+            res_data.reserve((input.size + 1) * input_rows_count);
         }
         else
         {
@@ -135,8 +111,8 @@ public:
 
         const auto * column_replace_const = checkAndGetColumn<ColumnConst>(column_replace.get());
         const auto * column_replace_string = checkAndGetColumn<ColumnString>(column_replace.get());
-        bool input_is_const = column_input_const != nullptr;
-        bool replace_is_const = column_replace_const != nullptr;
+        bool input_is_const = (column_input_const != nullptr);
+        bool replace_is_const = (column_replace_const != nullptr);
 
 #define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \
     if (input_is_const && replace_is_const) \
@@ -150,8 +126,9 @@ public:
             length, \
             res_data, \
             res_offsets); \
-    else if (input_is_const) \
+    else if (input_is_const && !replace_is_const) \
         constantVector<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+            input_rows_count, \
             column_input_const->getDataAt(0), \
             column_replace_string->getChars(), \
             column_replace_string->getOffsets(), \
@@ -161,8 +138,9 @@ public:
             length, \
             res_data, \
             res_offsets); \
-    else if (replace_is_const) \
+    else if (!input_is_const && replace_is_const) \
         vectorConstant<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+            input_rows_count, \
             column_input_string->getChars(), \
             column_input_string->getOffsets(), \
             column_replace_const->getDataAt(0), \
@@ -174,6 +152,7 @@ public:
             res_offsets); \
     else \
         vectorVector<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+            input_rows_count, \
             column_input_string->getChars(), \
             column_input_string->getOffsets(), \
             column_replace_string->getChars(), \
@@ -185,7 +164,7 @@ public:
             res_data, \
             res_offsets);
 
-        if (three_args)
+        if (has_three_args)
         {
             if (offset_is_const)
             {
@@ -251,7 +230,7 @@ private:
             return bytes;
     }
 
-    template <bool three_args, bool offset_is_const, bool length_is_const>
+    template <bool has_three_args, bool offset_is_const, bool length_is_const>
     void constantConstant(
         size_t rows,
         const StringRef & input,
@@ -263,7 +242,7 @@ private:
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets) const
     {
-        if (!three_args && length_is_const && const_length < 0)
+        if (!has_three_args && length_is_const && const_length < 0)
         {
             constantConstant<true, offset_is_const, false>(
                 rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
@@ -277,12 +256,12 @@ private:
 
         size_t replace_size = getSliceSize(reinterpret_cast<const UInt8 *>(replace.data), replace.size);
         size_t valid_length = 0; // not negative
-        if constexpr (!three_args && length_is_const)
+        if constexpr (!has_three_args && length_is_const)
         {
             assert(const_length >= 0);
             valid_length = const_length;
         }
-        else if constexpr (three_args)
+        else if constexpr (has_three_args)
         {
             valid_length = replace_size;
         }
@@ -300,7 +279,7 @@ private:
                 valid_offset = getValidOffset(offset, input_size);
             }
 
-            if constexpr (!three_args && !length_is_const)
+            if constexpr (!has_three_args && !length_is_const)
             {
                 length = column_length->getInt(i);
                 valid_length = length >= 0 ? length : replace_size;
@@ -331,10 +310,10 @@ private:
             }
             else
             {
-                const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
+                const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
                 size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin;
 
-                const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
+                const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
                 size_t suffix_bytes = input_end - suffix_begin;
 
                 size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator
@@ -363,8 +342,9 @@ private:
         }
     }
 
-    template <bool three_args, bool offset_is_const, bool length_is_const>
+    template <bool has_three_args, bool offset_is_const, bool length_is_const>
     void vectorConstant(
+        size_t rows,
         const ColumnString::Chars & input_data,
         const ColumnString::Offsets & input_offsets,
         const StringRef & replace,
@@ -375,27 +355,26 @@ private:
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets) const
     {
-        if (!three_args && length_is_const && const_length < 0)
+        if (!has_three_args && length_is_const && const_length < 0)
         {
             vectorConstant<true, offset_is_const, false>(
-                input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
+                rows, input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
             return;
         }
 
         size_t replace_size = getSliceSize(reinterpret_cast<const UInt8 *>(replace.data), replace.size);
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
-        if constexpr (!three_args && length_is_const)
+        if constexpr (!has_three_args && length_is_const)
         {
             assert(const_length >= 0);
             valid_length = const_length;
         }
-        else if constexpr (three_args)
+        else if constexpr (has_three_args)
         {
             valid_length = replace_size;
         }
 
-        size_t rows = input_offsets.size();
         Int64 offset = 0; // start from 1, maybe negative
         size_t valid_offset = 0; // start from 0, not negative
         size_t res_offset = 0;
@@ -415,7 +394,7 @@ private:
                 valid_offset = getValidOffset(offset, input_size);
             }
 
-            if constexpr (!three_args && !length_is_const)
+            if constexpr (!has_three_args && !length_is_const)
             {
                 length = column_length->getInt(i);
                 valid_length = length >= 0 ? length : replace_size;
@@ -449,9 +428,9 @@ private:
             {
                 const auto * input_begin = &input_data[input_offset];
                 const auto * input_end = &input_data[input_offset + input_bytes];
-                const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
+                const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
                 size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin;
-                const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
+                const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
                 size_t suffix_bytes = input_end - suffix_begin;
 
                 size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator
@@ -480,8 +459,9 @@ private:
         }
     }
 
-    template <bool three_args, bool offset_is_const, bool length_is_const>
+    template <bool has_three_args, bool offset_is_const, bool length_is_const>
     void constantVector(
+        size_t rows,
         const StringRef & input,
         const ColumnString::Chars & replace_data,
         const ColumnString::Offsets & replace_offsets,
@@ -492,10 +472,10 @@ private:
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets) const
     {
-        if (!three_args && length_is_const && const_length < 0)
+        if (!has_three_args && length_is_const && const_length < 0)
         {
             constantVector<true, offset_is_const, false>(
-                input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets);
+                rows, input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets);
             return;
         }
 
@@ -506,13 +486,12 @@ private:
 
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
-        if constexpr (!three_args && length_is_const)
+        if constexpr (!has_three_args && length_is_const)
         {
             assert(const_length >= 0);
             valid_length = const_length;
         }
 
-        size_t rows = replace_offsets.size();
         const auto * input_begin = reinterpret_cast<const UInt8 *>(input.data);
         const auto * input_end = reinterpret_cast<const UInt8 *>(input.data + input.size);
         Int64 offset = 0; // start from 1, maybe negative
@@ -529,7 +508,7 @@ private:
                 valid_offset = getValidOffset(offset, input_size);
             }
 
-            if constexpr (three_args)
+            if constexpr (has_three_args)
             {
                 valid_length = replace_size;
             }
@@ -564,9 +543,9 @@ private:
             }
             else
             {
-                const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
+                const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
                 size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin;
-                const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
+                const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
                 size_t suffix_bytes = input_end - suffix_begin;
                 size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator
                 res_data.resize(new_res_size);
@@ -594,8 +573,9 @@ private:
         }
     }
 
-    template <bool three_args, bool offset_is_const, bool length_is_const>
+    template <bool has_three_args, bool offset_is_const, bool length_is_const>
     void vectorVector(
+        size_t rows,
         const ColumnString::Chars & input_data,
         const ColumnString::Offsets & input_offsets,
         const ColumnString::Chars & replace_data,
@@ -607,9 +587,10 @@ private:
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets) const
     {
-        if (!three_args && length_is_const && const_length < 0)
+        if (!has_three_args && length_is_const && const_length < 0)
         {
             vectorVector<true, offset_is_const, false>(
+                rows,
                 input_data,
                 input_offsets,
                 replace_data,
@@ -625,13 +606,12 @@ private:
 
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
-        if constexpr (!three_args && length_is_const)
+        if constexpr (!has_three_args && length_is_const)
         {
             assert(const_length >= 0);
             valid_length = const_length;
         }
 
-        size_t rows = input_offsets.size();
         Int64 offset = 0; // start from 1, maybe negative
         size_t valid_offset = 0; // start from 0, not negative
         size_t res_offset = 0;
@@ -655,7 +635,7 @@ private:
                 valid_offset = getValidOffset(offset, input_size);
             }
 
-            if constexpr (three_args)
+            if constexpr (has_three_args)
             {
                 valid_length = replace_size;
             }
@@ -693,9 +673,9 @@ private:
             {
                 const auto * input_begin = &input_data[input_offset];
                 const auto * input_end = &input_data[input_offset + input_bytes];
-                const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
+                const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end);
                 size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin;
-                const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
+                const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin);
                 size_t suffix_bytes = input_end - suffix_begin;
                 size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator
                 res_data.resize(new_res_size);
diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference
index 9e79db2e131..383a26986d6 100644
--- a/tests/queries/0_stateless/03205_overlay.reference
+++ b/tests/queries/0_stateless/03205_overlay.reference
@@ -1,168 +1,62 @@
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark_SQL
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark CORE
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Spark ANSI SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
-Structured SQL
+Negative test of overlay
+Positive test 1 with various combinations of const/non-const columns
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Positive test 2 with various combinations of const/non-const columns
+Spark_SQL	Spark_SQL和CH
+Spark_SQL	Spark_SQL和CH
+Spark_SQL	Spark_SQL和CH
+Spark_SQL	Spark_SQL和CH
+Spark_SQL	Spark_SQL和CH
+Spark_SQL	Spark_SQL和CH
+Spark_SQL	Spark_SQL和CH
+Spark_SQL	Spark_SQL和CH
+Positive test 3 with various combinations of const/non-const columns
+Spark CORE	Spark CORECH
+Spark CORE	Spark CORECH
+Spark CORE	Spark CORECH
+Spark CORE	Spark CORECH
+Spark CORE	Spark CORECH
+Spark CORE	Spark CORECH
+Spark CORE	Spark CORECH
+Spark CORE	Spark CORECH
+Positive test 4 with various combinations of const/non-const columns
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Positive test 5 with various combinations of const/non-const columns
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
+Structured SQL	Structured SQL和CH
diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql
index b131312c934..4fd0791521d 100644
--- a/tests/queries/0_stateless/03205_overlay.sql
+++ b/tests/queries/0_stateless/03205_overlay.sql
@@ -1,60 +1,74 @@
-SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3);
-SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3);
-SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3);
-SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3);
-SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3);
-SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3);
-SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)) from numbers(3);
-SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
+SELECT 'Negative test of overlay';
+SELECT overlay('hello', 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT overlay('hello', 'world'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT overlay('hello', 'world', 2, 3, 'extra'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT overlay(123, 'world', 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT overlay('hello', 456, 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT overlay('hello', 'world', 'two', 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT overlay('hello', 'world', 2, 'three'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
-SELECT overlay('Spark SQL', '_', 6) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), '_', 6) from numbers(3);
-SELECT overlay('Spark SQL', materialize('_'), 6) from numbers(3);
-SELECT overlay('Spark SQL', '_', materialize(6)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('_'), 6) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), '_', materialize(6)) from numbers(3);
-SELECT overlay('Spark SQL', materialize('_'), materialize(6)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)) from numbers(3);
-
-SELECT overlay('Spark SQL', 'CORE', 7) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'CORE', 7) from numbers(3);
-SELECT overlay('Spark SQL', materialize('CORE'), 7) from numbers(3);
-SELECT overlay('Spark SQL', 'CORE', materialize(7)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)) from numbers(3);
-SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)) from numbers(3);
-
-SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3);
-SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3);
-SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3);
-SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3);
-SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3);
-SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3);
-SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
-
-SELECT overlay('Spark SQL', 'tructured', 2, 4) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4) from numbers(3);
-SELECT overlay('Spark SQL', materialize('tructured'), 2, 4) from numbers(3);
-SELECT overlay('Spark SQL', 'tructured', materialize(2), 4) from numbers(3);
-SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)) from numbers(3);
-SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4) from numbers(3);
-SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)) from numbers(3);
-SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)) from numbers(3);
-SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3);
+SELECT 'Positive test 1 with various combinations of const/non-const columns';
+SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0);
+SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0);
+SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0);
+SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0));
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0));
+SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0);
+SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0));
+SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0));
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0);
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0));
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0));
+SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0));
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0));
+	
+SELECT 'Positive test 2 with various combinations of const/non-const columns';
+SELECT overlay('Spark SQL', '_', 6), overlayUTF8('Spark SQL和CH', '_', 6);
+SELECT overlay(materialize('Spark SQL'), '_', 6), overlayUTF8(materialize('Spark SQL和CH'), '_', 6);
+SELECT overlay('Spark SQL', materialize('_'), 6), overlayUTF8('Spark SQL和CH', materialize('_'), 6);
+SELECT overlay('Spark SQL', '_', materialize(6)), overlayUTF8('Spark SQL和CH', '_', materialize(6));
+SELECT overlay(materialize('Spark SQL'), materialize('_'), 6), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6);
+SELECT overlay(materialize('Spark SQL'), '_', materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6));
+SELECT overlay('Spark SQL', materialize('_'), materialize(6)), overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6));
+SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6));
+	
+SELECT 'Positive test 3 with various combinations of const/non-const columns';
+SELECT overlay('Spark SQL', 'CORE', 7), overlayUTF8('Spark SQL和CH', 'CORE', 7);
+SELECT overlay(materialize('Spark SQL'), 'CORE', 7), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7);
+SELECT overlay('Spark SQL', materialize('CORE'), 7), overlayUTF8('Spark SQL和CH', materialize('CORE'), 7);
+SELECT overlay('Spark SQL', 'CORE', materialize(7)), overlayUTF8('Spark SQL和CH', 'CORE', materialize(7));
+SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7);
+SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7));
+SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)), overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7));
+SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7));
+	
+SELECT 'Positive test 4 with various combinations of const/non-const columns';
+SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0);
+SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0);
+SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0);
+SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0));
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0);
+SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0));
+SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0);
+SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0));
+SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0));
+SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0));
+	
+SELECT 'Positive test 5 with various combinations of const/non-const columns';
+SELECT overlay('Spark SQL', 'tructured', 2, 4), overlayUTF8('Spark SQL和CH', 'tructured', 2, 4);
+SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4);
+SELECT overlay('Spark SQL', materialize('tructured'), 2, 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4);
+SELECT overlay('Spark SQL', 'tructured', materialize(2), 4), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4);
+SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4));
+SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4);
+SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4);
+SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4));
+SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4);
+SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4));
+SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4));
+SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4));
diff --git a/tests/queries/0_stateless/03206_overlay_utf8.reference b/tests/queries/0_stateless/03206_overlay_utf8.reference
deleted file mode 100644
index 19878c97184..00000000000
--- a/tests/queries/0_stateless/03206_overlay_utf8.reference
+++ /dev/null
@@ -1,168 +0,0 @@
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark_SQL和CH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark CORECH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Spark ANSI SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
-Structured SQL和CH
diff --git a/tests/queries/0_stateless/03206_overlay_utf8.sql b/tests/queries/0_stateless/03206_overlay_utf8.sql
deleted file mode 100644
index 00b756c8b5b..00000000000
--- a/tests/queries/0_stateless/03206_overlay_utf8.sql
+++ /dev/null
@@ -1,60 +0,0 @@
-SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
-
-SELECT overlayUTF8('Spark SQL和CH', '_', 6) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', 6) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('_'), 6) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', '_', materialize(6)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)) from numbers(3);
-
-SELECT overlayUTF8('Spark SQL和CH', 'CORE', 7) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), 7) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)) from numbers(3);
-
-SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3);
-
-SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, 4) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)) from numbers(3);
-SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)) from numbers(3);
-SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3);

From 2a8c9b8518175d81632cf7ca48c10522b737e6b4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 19 Aug 2024 07:59:14 +0200
Subject: [PATCH 217/363] Fix tests

---
 .../0_stateless/01825_new_type_json_ghdata_insert_select.sh    | 3 ++-
 tests/queries/0_stateless/01825_type_json_ghdata.sh            | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
index 2afec5ba7fe..568ba2bd185 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, long
+# Tags: no-fasttest, no-s3-storage, long
+# ^ no-s3-storage: it is memory-hungry
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01825_type_json_ghdata.sh b/tests/queries/0_stateless/01825_type_json_ghdata.sh
index 2686e2c8eb1..7e952de6c08 100755
--- a/tests/queries/0_stateless/01825_type_json_ghdata.sh
+++ b/tests/queries/0_stateless/01825_type_json_ghdata.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest
+# Tags: no-fasttest, no-s3-storage, long
+# ^ no-s3-storage: it is memory-hungry
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From e623ad041f4937b0e7ed22f3159acfee6c0147b3 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 25 Jul 2024 16:44:17 +0200
Subject: [PATCH 218/363] Make C-z ignorance configurable
 (ignore_shell_suspend) in clickhouse-client

C-z is extermelly useful for some users (like myself), so provide a way
to configure it in client and avoid it's ignorance in clickhouse-disks
(I hope it is OK since it is not that known utility and it does not have
it's own configuration, while cli option is useless, one should remeber
about it).

Honestly I've never seen any interactive client that forbids C-z, so
ignoring it my default looks strange to me.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/client/clickhouse-client.xml   | 3 +++
 programs/disks/DisksApp.cpp             | 1 +
 programs/disks/DisksApp.h               | 2 +-
 programs/disks/DisksClient.cpp          | 1 -
 programs/disks/DisksClient.h            | 3 +--
 programs/keeper-client/KeeperClient.cpp | 1 +
 src/Client/ClientBase.cpp               | 1 +
 src/Client/ReplxxLineReader.cpp         | 4 +++-
 src/Client/ReplxxLineReader.h           | 1 +
 9 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml
index 9ce7d1cb223..6eb8976a6ef 100644
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@@ -53,6 +53,9 @@
     </proto_caps>
     -->
 
+    <!-- Do not send clickhouse-client to background on C-z -->
+    <!-- <ignore_shell_suspend>true</ignore_shell_suspend> -->
+
     <!--
         Settings adjustable via command-line parameters
         can take their defaults from that config file, see examples:
diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp
index 59ba45b9451..884550ff2ed 100644
--- a/programs/disks/DisksApp.cpp
+++ b/programs/disks/DisksApp.cpp
@@ -247,6 +247,7 @@ void DisksApp::runInteractiveReplxx()
         suggest,
         history_file,
         /* multiline= */ false,
+        /* ignore_shell_suspend= */ false,
         query_extenders,
         query_delimiters,
         word_break_characters.c_str(),
diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h
index f8167884c62..5b240648508 100644
--- a/programs/disks/DisksApp.h
+++ b/programs/disks/DisksApp.h
@@ -2,7 +2,7 @@
 
 #include <unordered_map>
 #include <vector>
-#include <Client/ReplxxLineReader.h>
+#include <Client/LineReader.h>
 #include <Loggers/Loggers.h>
 #include "DisksClient.h"
 #include "ICommand_fwd.h"
diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp
index 7e36c7911ab..53d7c7f78f6 100644
--- a/programs/disks/DisksClient.cpp
+++ b/programs/disks/DisksClient.cpp
@@ -1,6 +1,5 @@
 #include "DisksClient.h"
 #include <Client/ClientBase.h>
-#include <Client/ReplxxLineReader.h>
 #include <Disks/registerDisks.h>
 #include <Common/Config/ConfigProcessor.h>
 
diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h
index 8a55d22af93..299631f1afe 100644
--- a/programs/disks/DisksClient.h
+++ b/programs/disks/DisksClient.h
@@ -5,9 +5,8 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include <Client/ReplxxLineReader.h>
 #include <Loggers/Loggers.h>
-#include "Disks/IDisk.h"
+#include <Disks/IDisk.h>
 
 #include <Interpreters/Context.h>
 #include <boost/program_options/options_description.hpp>
diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp
index a20c1f686f3..ad376d4b88f 100644
--- a/programs/keeper-client/KeeperClient.cpp
+++ b/programs/keeper-client/KeeperClient.cpp
@@ -314,6 +314,7 @@ void KeeperClient::runInteractiveReplxx()
         suggest,
         history_file,
         /* multiline= */ false,
+        /* ignore_shell_suspend= */ false,
         query_extenders,
         query_delimiters,
         word_break_characters,
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 74357d33f1c..01d03006eec 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -2554,6 +2554,7 @@ void ClientBase::runInteractive()
         *suggest,
         history_file,
         getClientConfiguration().has("multiline"),
+        getClientConfiguration().getBool("ignore_shell_suspend", true),
         query_extenders,
         query_delimiters,
         word_break_characters,
diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp
index 3b3508d1a58..78ae6c5eb15 100644
--- a/src/Client/ReplxxLineReader.cpp
+++ b/src/Client/ReplxxLineReader.cpp
@@ -294,6 +294,7 @@ ReplxxLineReader::ReplxxLineReader(
     Suggest & suggest,
     const String & history_file_path_,
     bool multiline_,
+    bool ignore_shell_suspend,
     Patterns extenders_,
     Patterns delimiters_,
     const char word_break_characters_[],
@@ -363,7 +364,8 @@ ReplxxLineReader::ReplxxLineReader(
     rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); });
 
     /// We don't want the default, "suspend" behavior, it confuses people.
-    rx.bind_key_internal(replxx::Replxx::KEY::control('Z'), "insert_character");
+    if (ignore_shell_suspend)
+        rx.bind_key_internal(replxx::Replxx::KEY::control('Z'), "insert_character");
 
     auto commit_action = [this](char32_t code)
     {
diff --git a/src/Client/ReplxxLineReader.h b/src/Client/ReplxxLineReader.h
index c46080420ef..1dbad2c70dd 100644
--- a/src/Client/ReplxxLineReader.h
+++ b/src/Client/ReplxxLineReader.h
@@ -15,6 +15,7 @@ public:
         Suggest & suggest,
         const String & history_file_path,
         bool multiline,
+        bool ignore_shell_suspend,
         Patterns extenders_,
         Patterns delimiters_,
         const char word_break_characters_[],

From 845468b215b0565c1a71c7a5ac339940af1e95ef Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 16 Aug 2024 17:56:12 +0800
Subject: [PATCH 219/363] first commit

---
 src/Common/examples/CMakeLists.txt            |   5 +
 src/Common/examples/utf8_upper_lower.cpp      |  27 ++
 src/Functions/LowerUpperImpl.h                |   1 -
 src/Functions/LowerUpperUTF8Impl.h            | 283 +++---------------
 src/Functions/initcapUTF8.cpp                 |   3 +-
 src/Functions/lowerUTF8.cpp                   |  25 +-
 src/Functions/upperUTF8.cpp                   |  24 +-
 .../00170_lower_upper_utf8.reference          |   4 +
 .../0_stateless/00170_lower_upper_utf8.sql    |  11 +
 .../00233_position_function_family.sql        |   3 +
 .../0_stateless/00761_lower_utf8_bug.sql      |   3 +
 .../0_stateless/01278_random_string_utf8.sql  |   3 +
 .../0_stateless/01431_utf8_ubsan.reference    |   4 +-
 .../queries/0_stateless/01431_utf8_ubsan.sql  |   3 +
 .../0_stateless/01590_countSubstrings.sql     |   3 +
 ...71_lower_upper_utf8_row_overlaps.reference |   4 +-
 .../02071_lower_upper_utf8_row_overlaps.sql   |   3 +
 ...new_functions_must_be_documented.reference |   2 -
 .../02514_if_with_lazy_low_cardinality.sql    |   3 +
 .../0_stateless/02807_lower_utf8_msan.sql     |   3 +
 tests/queries/0_stateless/03015_peder1001.sql |   3 +
 21 files changed, 159 insertions(+), 261 deletions(-)
 create mode 100644 src/Common/examples/utf8_upper_lower.cpp

diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt
index 69580d4ad0e..8383e80d09d 100644
--- a/src/Common/examples/CMakeLists.txt
+++ b/src/Common/examples/CMakeLists.txt
@@ -92,3 +92,8 @@ endif()
 
 clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp)
 target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config)
+
+if (TARGET ch_contrib::icu)
+    clickhouse_add_executable (utf8_upper_lower utf8_upper_lower.cpp)
+    target_link_libraries (utf8_upper_lower PRIVATE ch_contrib::icu)
+endif ()
diff --git a/src/Common/examples/utf8_upper_lower.cpp b/src/Common/examples/utf8_upper_lower.cpp
new file mode 100644
index 00000000000..826e1763105
--- /dev/null
+++ b/src/Common/examples/utf8_upper_lower.cpp
@@ -0,0 +1,27 @@
+#include <iostream>
+#include <unicode/unistr.h>
+
+std::string utf8_to_lower(const std::string & input)
+{
+    icu::UnicodeString unicodeInput(input.c_str(), "UTF-8");
+    unicodeInput.toLower();
+    std::string output;
+    unicodeInput.toUTF8String(output);
+    return output;
+}
+
+std::string utf8_to_upper(const std::string & input)
+{
+    icu::UnicodeString unicodeInput(input.c_str(), "UTF-8");
+    unicodeInput.toUpper();
+    std::string output;
+    unicodeInput.toUTF8String(output);
+    return output;
+}
+
+int main()
+{
+    std::string input = "ır";
+    std::cout << "upper:" << utf8_to_upper(input) << std::endl;
+    return 0;
+}
diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h
index d463ef96e16..a52703d10c8 100644
--- a/src/Functions/LowerUpperImpl.h
+++ b/src/Functions/LowerUpperImpl.h
@@ -1,7 +1,6 @@
 #pragma once
 #include <Columns/ColumnString.h>
 
-
 namespace DB
 {
 
diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index eedabca5b22..5da085f48e5 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -1,15 +1,14 @@
 #pragma once
+
+#include "config.h"
+
+#if USE_ICU
+
 #include <Columns/ColumnString.h>
 #include <Functions/LowerUpperImpl.h>
-#include <base/defines.h>
-#include <Poco/UTF8Encoding.h>
+#include <base/find_symbols.h>
+#include <unicode/unistr.h>
 #include <Common/StringUtils.h>
-#include <Common/UTF8Helpers.h>
-
-#ifdef __SSE2__
-#include <emmintrin.h>
-#endif
-
 
 namespace DB
 {
@@ -19,71 +18,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-/// xor or do nothing
-template <bool>
-UInt8 xor_or_identity(const UInt8 c, const int mask)
-{
-    return c ^ mask;
-}
-
-template <>
-inline UInt8 xor_or_identity<false>(const UInt8 c, const int)
-{
-    return c;
-}
-
-/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array
-template <bool to_lower>
-inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst)
-{
-    if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu))
-    {
-        /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x1);
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x10);
-    }
-    else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu))
-    {
-        /// ѐёђѓєѕіїјљњћќѝўџ
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x1);
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x10);
-    }
-    else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu))
-    {
-        /// А-П
-        *dst++ = *src++;
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x20);
-    }
-    else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu))
-    {
-        /// а-п
-        *dst++ = *src++;
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x20);
-    }
-    else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu))
-    {
-        /// Р-Я
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x1);
-        *dst++ = xor_or_identity<to_lower>(*src++, 0x20);
-    }
-    else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu))
-    {
-        /// р-я
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x1);
-        *dst++ = xor_or_identity<!to_lower>(*src++, 0x20);
-    }
-}
-
-
-/** If the string contains UTF-8 encoded text, convert it to the lower (upper) case.
-  * Note: It is assumed that after the character is converted to another case,
-  *  the length of its multibyte sequence in UTF-8 does not change.
-  * Otherwise, the behavior is undefined.
-  */
-template <char not_case_lower_bound,
-    char not_case_upper_bound,
-    int to_case(int),
-    void cyrillic_to_case(const UInt8 *&, UInt8 *&)>
+template <char not_case_lower_bound, char not_case_upper_bound, bool upper>
 struct LowerUpperUTF8Impl
 {
     static void vector(
@@ -103,180 +38,46 @@ struct LowerUpperUTF8Impl
             return;
         }
 
-        res_data.resize_exact(data.size());
-        res_offsets.assign(offsets);
-        array(data.data(), data.data() + data.size(), offsets, res_data.data());
+        res_data.resize(data.size());
+        res_offsets.resize_exact(offsets.size());
+
+        String output;
+        size_t curr_offset = 0;
+        for (size_t i = 0; i < offsets.size(); ++i)
+        {
+            const auto * data_start = reinterpret_cast<const char *>(&data[offsets[i - 1]]);
+            size_t size = offsets[i] - offsets[i - 1];
+
+            icu::UnicodeString input(data_start, static_cast<int32_t>(size), "UTF-8");
+            if constexpr (upper)
+                input.toUpper();
+            else
+                input.toLower();
+
+            output.clear();
+            input.toUTF8String(output);
+
+            /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first
+            /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this
+            /// case, the behavior is also reasonable.
+            const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size());
+            size_t valid_size = res_end ? res_end - output.data() + 1 : 0;
+
+            res_data.resize(curr_offset + valid_size + 1);
+            memcpy(&res_data[curr_offset], output.data(), valid_size);
+            res_data[curr_offset + valid_size] = 0;
+
+            curr_offset += valid_size + 1;
+            res_offsets[i] = curr_offset;
+        }
     }
 
     static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
     {
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument");
     }
-
-    /** Converts a single code point starting at `src` to desired case, storing result starting at `dst`.
-     *    `src` and `dst` are incremented by corresponding sequence lengths. */
-    static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial)
-    {
-        if (src[0] <= ascii_upper_bound)
-        {
-            if (*src >= not_case_lower_bound && *src <= not_case_upper_bound)
-                *dst++ = *src++ ^ flip_case_mask;
-            else
-                *dst++ = *src++;
-        }
-        else if (src + 1 < src_end
-            && ((src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0xBFu)) || (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x9Fu))))
-        {
-            cyrillic_to_case(src, dst);
-        }
-        else if (src + 1 < src_end && src[0] == 0xC2u)
-        {
-            /// Punctuation U+0080 - U+00BF, UTF-8: C2 80 - C2 BF
-            *dst++ = *src++;
-            *dst++ = *src++;
-        }
-        else if (src + 2 < src_end && src[0] == 0xE2u)
-        {
-            /// Characters U+2000 - U+2FFF, UTF-8: E2 80 80 - E2 BF BF
-            *dst++ = *src++;
-            *dst++ = *src++;
-            *dst++ = *src++;
-        }
-        else
-        {
-            size_t src_sequence_length = UTF8::seqLength(*src);
-            /// In case partial buffer was passed (due to SSE optimization)
-            /// we cannot convert it with current src_end, but we may have more
-            /// bytes to convert and eventually got correct symbol.
-            if (partial && src_sequence_length > static_cast<size_t>(src_end - src))
-                return false;
-
-            auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
-            if (src_code_point)
-            {
-                int dst_code_point = to_case(*src_code_point);
-                if (dst_code_point > 0)
-                {
-                    size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
-                    assert(dst_sequence_length <= 4);
-
-                    /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
-                    /// As an example, this happens for ß and ẞ.
-                    if (dst_sequence_length == src_sequence_length)
-                    {
-                        src += dst_sequence_length;
-                        dst += dst_sequence_length;
-                        return true;
-                    }
-                }
-            }
-
-            *dst = *src;
-            ++dst;
-            ++src;
-        }
-
-        return true;
-    }
-
-private:
-    static constexpr auto ascii_upper_bound = '\x7f';
-    static constexpr auto flip_case_mask = 'A' ^ 'a';
-
-    static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
-    {
-        const auto * offset_it = offsets.begin();
-        const UInt8 * begin = src;
-
-#ifdef __SSE2__
-        static constexpr auto bytes_sse = sizeof(__m128i);
-
-        /// If we are before this position, we can still read at least bytes_sse.
-        const auto * src_end_sse = src_end - bytes_sse + 1;
-
-        /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f)
-        const auto v_zero = _mm_setzero_si128();
-        const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1);
-        const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1);
-        const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask);
-
-        while (src < src_end_sse)
-        {
-            const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
-
-            /// check for ASCII
-            const auto is_not_ascii = _mm_cmplt_epi8(chars, v_zero);
-            const auto mask_is_not_ascii = _mm_movemask_epi8(is_not_ascii);
-
-            /// ASCII
-            if (mask_is_not_ascii == 0)
-            {
-                const auto is_not_case
-                    = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound));
-                const auto mask_is_not_case = _mm_movemask_epi8(is_not_case);
-
-                /// everything in correct case ASCII
-                if (mask_is_not_case == 0)
-                    _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars);
-                else
-                {
-                    /// ASCII in mixed case
-                    /// keep `flip_case_mask` only where necessary, zero out elsewhere
-                    const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case);
-
-                    /// flip case by applying calculated mask
-                    const auto cased_chars = _mm_xor_si128(chars, xor_mask);
-
-                    /// store result back to destination
-                    _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars);
-                }
-
-                src += bytes_sse;
-                dst += bytes_sse;
-            }
-            else
-            {
-                /// UTF-8
-
-                /// Find the offset of the next string after src
-                size_t offset_from_begin = src - begin;
-                while (offset_from_begin >= *offset_it)
-                    ++offset_it;
-
-                /// Do not allow one row influence another (since row may have invalid sequence, and break the next)
-                const UInt8 * row_end = begin + *offset_it;
-                chassert(row_end >= src);
-                const UInt8 * expected_end = std::min(src + bytes_sse, row_end);
-
-                while (src < expected_end)
-                {
-                    if (!toCase(src, expected_end, dst, /* partial= */ true))
-                    {
-                        /// Fallback to handling byte by byte.
-                        src_end_sse = src;
-                        break;
-                    }
-                }
-            }
-        }
-
-        /// Find the offset of the next string after src
-        size_t offset_from_begin = src - begin;
-        while (offset_it != offsets.end() && offset_from_begin >= *offset_it)
-            ++offset_it;
-#endif
-
-        /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
-        while (src < src_end)
-        {
-            const UInt8 * row_end = begin + *offset_it;
-            chassert(row_end >= src);
-
-            while (src < row_end)
-                toCase(src, row_end, dst, /* partial= */ false);
-            ++offset_it;
-        }
-    }
 };
 
 }
+
+#endif
diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp
index 282d846094e..004586dce26 100644
--- a/src/Functions/initcapUTF8.cpp
+++ b/src/Functions/initcapUTF8.cpp
@@ -1,9 +1,8 @@
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionStringToString.h>
-#include <Functions/LowerUpperUTF8Impl.h>
 #include <Functions/FunctionFactory.h>
 #include <Poco/Unicode.h>
-
+#include <Common/UTF8Helpers.h>
 
 namespace DB
 {
diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp
index 7adb0069121..e2f7cb84730 100644
--- a/src/Functions/lowerUTF8.cpp
+++ b/src/Functions/lowerUTF8.cpp
@@ -1,9 +1,10 @@
-#include <DataTypes/DataTypeString.h>
+#include "config.h"
+
+#if USE_ICU
+
+#include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
 #include <Functions/LowerUpperUTF8Impl.h>
-#include <Functions/FunctionFactory.h>
-#include <Poco/Unicode.h>
-
 
 namespace DB
 {
@@ -15,13 +16,25 @@ struct NameLowerUTF8
     static constexpr auto name = "lowerUTF8";
 };
 
-using FunctionLowerUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'A', 'Z', Poco::Unicode::toLower, UTF8CyrillicToCase<true>>, NameLowerUTF8>;
+using FunctionLowerUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'A', 'Z', false>, NameLowerUTF8>;
 
 }
 
 REGISTER_FUNCTION(LowerUTF8)
 {
-    factory.registerFunction<FunctionLowerUTF8>();
+    FunctionDocumentation::Description description
+        = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)";
+    FunctionDocumentation::Syntax syntax = "lowerUTF8(input)";
+    FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}};
+    FunctionDocumentation::ReturnedValue returned_value = "A String data type value";
+    FunctionDocumentation::Examples examples = {
+        {"first", "SELECT lowerUTF8('München') as Lowerutf8;", "münchen"},
+    };
+    FunctionDocumentation::Categories categories = {"String"};
+
+    factory.registerFunction<FunctionLowerUTF8>({description, syntax, arguments, returned_value, examples, categories});
 }
 
 }
+
+#endif
diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp
index 659e67f0ef3..ef26430331f 100644
--- a/src/Functions/upperUTF8.cpp
+++ b/src/Functions/upperUTF8.cpp
@@ -1,8 +1,10 @@
+#include "config.h"
+
+#if USE_ICU
+
+#include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
 #include <Functions/LowerUpperUTF8Impl.h>
-#include <Functions/FunctionFactory.h>
-#include <Poco/Unicode.h>
-
 
 namespace DB
 {
@@ -14,13 +16,25 @@ struct NameUpperUTF8
     static constexpr auto name = "upperUTF8";
 };
 
-using FunctionUpperUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'a', 'z', Poco::Unicode::toUpper, UTF8CyrillicToCase<false>>, NameUpperUTF8>;
+using FunctionUpperUTF8 = FunctionStringToString<LowerUpperUTF8Impl<'a', 'z', true>, NameUpperUTF8>;
 
 }
 
 REGISTER_FUNCTION(UpperUTF8)
 {
-    factory.registerFunction<FunctionUpperUTF8>();
+    FunctionDocumentation::Description description
+        = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)";
+    FunctionDocumentation::Syntax syntax = "upperUTF8(input)";
+    FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}};
+    FunctionDocumentation::ReturnedValue returned_value = "A String data type value";
+    FunctionDocumentation::Examples examples = {
+        {"first", "SELECT upperUTF8('München') as Upperutf8;", "MÜNCHEN"},
+    };
+    FunctionDocumentation::Categories categories = {"String"};
+
+    factory.registerFunction<FunctionUpperUTF8>({description, syntax, arguments, returned_value, examples, categories});
 }
 
 }
+
+#endif
diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference
index f202cb75513..3c644f22b9b 100644
--- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference
+++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference
@@ -22,3 +22,7 @@
 1
 1
 1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql
index 4caba2033ff..85b6c5c6095 100644
--- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql
+++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str;
 select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str;
 select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa';
@@ -27,3 +30,11 @@ select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВ
 select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n;
 select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() from system.one array join range(16384) as n;
 select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n;
+
+-- Turkish language
+select upperUTF8('ır') = 'IR';
+select lowerUTF8('ır') = 'ır';
+
+-- German language
+select upper('öäüß') = 'öäüß';
+select lower('ÖÄÜẞ') = 'ÖÄÜẞ';
diff --git a/tests/queries/0_stateless/00233_position_function_family.sql b/tests/queries/0_stateless/00233_position_function_family.sql
index dd7394bc39a..d6668cb7ba4 100644
--- a/tests/queries/0_stateless/00233_position_function_family.sql
+++ b/tests/queries/0_stateless/00233_position_function_family.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SET send_logs_level = 'fatal';
 
 select 1 = position('', '');
diff --git a/tests/queries/0_stateless/00761_lower_utf8_bug.sql b/tests/queries/0_stateless/00761_lower_utf8_bug.sql
index de20b894331..a0ab55edc15 100644
--- a/tests/queries/0_stateless/00761_lower_utf8_bug.sql
+++ b/tests/queries/0_stateless/00761_lower_utf8_bug.sql
@@ -1 +1,4 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SELECT lowerUTF8('\xF0') = lowerUTF8('\xF0');
diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql
index da2dc48c3e1..290d6a0c759 100644
--- a/tests/queries/0_stateless/01278_random_string_utf8.sql
+++ b/tests/queries/0_stateless/01278_random_string_utf8.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SELECT randomStringUTF8('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT lengthUTF8(randomStringUTF8(100));
 SELECT toTypeName(randomStringUTF8(10));
diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference
index c98c950d535..dc785e57851 100644
--- a/tests/queries/0_stateless/01431_utf8_ubsan.reference
+++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference
@@ -1,2 +1,2 @@
-FF
-FF
+EFBFBD
+EFBFBD
diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql
index d6a299225b1..3a28e023805 100644
--- a/tests/queries/0_stateless/01431_utf8_ubsan.sql
+++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql
@@ -1,2 +1,5 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SELECT hex(lowerUTF8('\xFF'));
 SELECT hex(upperUTF8('\xFF'));
diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql
index b38cbb7d188..5ec4f412d7f 100644
--- a/tests/queries/0_stateless/01590_countSubstrings.sql
+++ b/tests/queries/0_stateless/01590_countSubstrings.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 --
 -- countSubstrings
 --
diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
index a3bac432482..deabef61a88 100644
--- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
+++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference
@@ -5,9 +5,9 @@ insert into utf8_overlap values ('\xe2'), ('Foo⚊BarBazBam'), ('\xe2'), ('Foo
 --                                             MONOGRAM FOR YANG
 with lowerUTF8(str) as l_, upperUTF8(str) as u_, '0x' || hex(str) as h_
 select length(str), if(l_ == '\xe2', h_, l_), if(u_ == '\xe2', h_, u_) from utf8_overlap format CSV;
-1,"0xE2","0xE2"
+1,"�","�"
 15,"foo⚊barbazbam","FOO⚊BARBAZBAM"
-1,"0xE2","0xE2"
+1,"�","�"
 15,"foo⚊barbazbam","FOO⚊BARBAZBAM"
 -- NOTE: regression test for introduced bug
 -- https://github.com/ClickHouse/ClickHouse/issues/42756
diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
index 8ca0a3f5f75..d175e0659d0 100644
--- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
+++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 drop table if exists utf8_overlap;
 create table utf8_overlap (str String) engine=Memory();
 
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index c39f1fb1ce9..0980e25b70f 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -416,7 +416,6 @@ logTrace
 lowCardinalityIndices
 lowCardinalityKeys
 lower
-lowerUTF8
 makeDate
 makeDate32
 makeDateTime
@@ -897,7 +896,6 @@ tupleToNameValuePairs
 unbin
 unhex
 upper
-upperUTF8
 uptime
 validateNestedArraySizes
 version
diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
index 80e3c0a9ece..b169cfd0ab9 100644
--- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
+++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory;
 insert into t (`arr.key`, `arr.value`) values (['a'], ['b']);
 select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr;
diff --git a/tests/queries/0_stateless/02807_lower_utf8_msan.sql b/tests/queries/0_stateless/02807_lower_utf8_msan.sql
index e9eb18bf615..95f224577f7 100644
--- a/tests/queries/0_stateless/02807_lower_utf8_msan.sql
+++ b/tests/queries/0_stateless/02807_lower_utf8_msan.sql
@@ -1,2 +1,5 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 SELECT lowerUTF8(arrayJoin(['©--------------------------------------', '©--------------------'])) ORDER BY 1;
 SELECT upperUTF8(materialize('aaaaАБВГaaaaaaaaaaaaАБВГAAAAaaAA')) FROM numbers(2);
diff --git a/tests/queries/0_stateless/03015_peder1001.sql b/tests/queries/0_stateless/03015_peder1001.sql
index 810503207f2..df8e4db1536 100644
--- a/tests/queries/0_stateless/03015_peder1001.sql
+++ b/tests/queries/0_stateless/03015_peder1001.sql
@@ -1,3 +1,6 @@
+-- Tags: no-fasttest
+-- no-fasttest: upper/lowerUTF8 use ICU
+
 DROP TABLE IF EXISTS test_data;
 
 CREATE TABLE test_data

From ef624e9b22a81c2cd7857224d6fe30c7ea4eed81 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 16 Aug 2024 17:58:54 +0800
Subject: [PATCH 220/363] remote icu contrib

---
 .gitmodules | 3 ---
 contrib/icu | 1 -
 2 files changed, 4 deletions(-)
 delete mode 160000 contrib/icu

diff --git a/.gitmodules b/.gitmodules
index cdee6a43ad8..4e0e341ea3b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -106,9 +106,6 @@
 [submodule "contrib/icudata"]
 	path = contrib/icudata
 	url = https://github.com/ClickHouse/icudata
-[submodule "contrib/icu"]
-	path = contrib/icu
-	url = https://github.com/unicode-org/icu
 [submodule "contrib/flatbuffers"]
 	path = contrib/flatbuffers
 	url = https://github.com/ClickHouse/flatbuffers
diff --git a/contrib/icu b/contrib/icu
deleted file mode 160000
index 7750081bda4..00000000000
--- a/contrib/icu
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625

From db1ba8d5385a85925648dfa8c131021022917024 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 16 Aug 2024 18:04:15 +0800
Subject: [PATCH 221/363] add submodule contrib/icu from clickhouse

---
 .gitmodules | 3 +++
 contrib/icu | 1 +
 2 files changed, 4 insertions(+)
 create mode 160000 contrib/icu

diff --git a/.gitmodules b/.gitmodules
index 4e0e341ea3b..53ebde0cd3b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -106,6 +106,9 @@
 [submodule "contrib/icudata"]
 	path = contrib/icudata
 	url = https://github.com/ClickHouse/icudata
+[submodule "contrib/icu"]
+	path = contrib/icu
+	url = https://github.com/ClickHouse/icu
 [submodule "contrib/flatbuffers"]
 	path = contrib/flatbuffers
 	url = https://github.com/ClickHouse/flatbuffers
diff --git a/contrib/icu b/contrib/icu
new file mode 160000
index 00000000000..4216173eeeb
--- /dev/null
+++ b/contrib/icu
@@ -0,0 +1 @@
+Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273

From 625b186b4d699fcbdd3befdb349c132d3fa65e74 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 19 Aug 2024 10:03:38 +0000
Subject: [PATCH 222/363] Fix build with -DENABLE_LIBRARIES=0

---
 src/DataTypes/DataTypeObject.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp
index d6395155397..3997e892e9f 100644
--- a/src/DataTypes/DataTypeObject.cpp
+++ b/src/DataTypes/DataTypeObject.cpp
@@ -18,13 +18,15 @@
 #include <Core/Settings.h>
 #include <IO/Operators.h>
 
+#include "config.h"
+
 #if USE_SIMDJSON
-#include <Common/JSONParsers/SimdJSONParser.h>
+#  include <Common/JSONParsers/SimdJSONParser.h>
+#elif USE_RAPIDJSON
+#  include <Common/JSONParsers/RapidJSONParser.h>
+#else
+#  include <Common/JSONParsers/DummyJSONParser.h>
 #endif
-#if USE_RAPIDJSON
-#include <Common/JSONParsers/RapidJSONParser.h>
-#endif
-#include <Common/JSONParsers/DummyJSONParser.h>
 
 namespace DB
 {
@@ -105,7 +107,7 @@ SerializationPtr DataTypeObject::doGetDefaultSerialization() const
     switch (schema_format)
     {
         case SchemaFormat::JSON:
-#ifdef USE_SIMDJSON
+#if USE_SIMDJSON
             return std::make_shared<SerializationJSON<SimdJSONParser>>(
                 std::move(typed_path_serializations),
                 paths_to_skip,

From 29a0161cdfeeed739578af279092e7694dd7fbda Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 19 Aug 2024 18:25:01 +0800
Subject: [PATCH 223/363] change as request

---
 .../functions/string-replace-functions.md     |  5 ++--
 src/Functions/overlay.cpp                     | 24 ++++++++++++-------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index 408a896e607..55e97688b18 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -251,7 +251,8 @@ select printf('%%%s %s %d', 'Hello', 'World', 2024);
 
 ## overlay
 
-Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`.
+Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed.
+
 
 **Syntax**
 
@@ -286,7 +287,7 @@ Result:
 
 ## overlayUTF8
 
-Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`.
+Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed.
 
 Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
 
diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp
index 73ca0acbb8e..20988c775a5 100644
--- a/src/Functions/overlay.cpp
+++ b/src/Functions/overlay.cpp
@@ -11,12 +11,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-}
-
 namespace
 {
 
@@ -708,8 +702,20 @@ private:
 
 REGISTER_FUNCTION(Overlay)
 {
-    factory.registerFunction<FunctionOverlay<false>>({}, FunctionFactory::Case::Insensitive);
-    factory.registerFunction<FunctionOverlay<true>>({}, FunctionFactory::Case::Sensitive);
-}
+    factory.registerFunction<FunctionOverlay<false>>(
+        {.description = R"(
+Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed.
+)",
+         .categories{"String"}},
+        FunctionFactory::Case::Insensitive);
 
+    factory.registerFunction<FunctionOverlay<true>>(
+        {.description = R"(
+Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed.
+
+Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+)",
+         .categories{"String"}},
+        FunctionFactory::Case::Sensitive);
+}
 }

From 8653c547c9e5543b46cead484eb446725ec737d6 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 19 Aug 2024 18:50:04 +0800
Subject: [PATCH 224/363] change as request

---
 .../0_stateless/03205_overlay.reference       | 88 ++++++++++---------
 tests/queries/0_stateless/03205_overlay.sql   | 72 +++++----------
 2 files changed, 70 insertions(+), 90 deletions(-)

diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference
index 383a26986d6..67a699944e0 100644
--- a/tests/queries/0_stateless/03205_overlay.reference
+++ b/tests/queries/0_stateless/03205_overlay.reference
@@ -1,22 +1,5 @@
 Negative test of overlay
-Positive test 1 with various combinations of const/non-const columns
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Spark ANSI SQL	Spark ANSI SQL和CH
-Positive test 2 with various combinations of const/non-const columns
+Test with 3 arguments and various combinations of const/non-const columns
 Spark_SQL	Spark_SQL和CH
 Spark_SQL	Spark_SQL和CH
 Spark_SQL	Spark_SQL和CH
@@ -25,16 +8,7 @@ Spark_SQL	Spark_SQL和CH
 Spark_SQL	Spark_SQL和CH
 Spark_SQL	Spark_SQL和CH
 Spark_SQL	Spark_SQL和CH
-Positive test 3 with various combinations of const/non-const columns
-Spark CORE	Spark CORECH
-Spark CORE	Spark CORECH
-Spark CORE	Spark CORECH
-Spark CORE	Spark CORECH
-Spark CORE	Spark CORECH
-Spark CORE	Spark CORECH
-Spark CORE	Spark CORECH
-Spark CORE	Spark CORECH
-Positive test 4 with various combinations of const/non-const columns
+Test with 4 arguments and various combinations of const/non-const columns
 Spark ANSI SQL	Spark ANSI SQL和CH
 Spark ANSI SQL	Spark ANSI SQL和CH
 Spark ANSI SQL	Spark ANSI SQL和CH
@@ -47,16 +21,48 @@ Spark ANSI SQL	Spark ANSI SQL和CH
 Spark ANSI SQL	Spark ANSI SQL和CH
 Spark ANSI SQL	Spark ANSI SQL和CH
 Spark ANSI SQL	Spark ANSI SQL和CH
-Positive test 5 with various combinations of const/non-const columns
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
-Structured SQL	Structured SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Spark ANSI SQL	Spark ANSI SQL和CH
+Test with different offset values
+-12	_park SQL	_park SQL和CH
+-11	_park SQL	S_ark SQL和CH
+-10	_park SQL	Sp_rk SQL和CH
+-9	_park SQL	Spa_k SQL和CH
+-8	S_ark SQL	Spar_ SQL和CH
+-7	Sp_rk SQL	Spark_SQL和CH
+-6	Spa_k SQL	Spark _QL和CH
+-5	Spar_ SQL	Spark S_L和CH
+-4	Spark_SQL	Spark SQ_和CH
+-3	Spark _QL	Spark SQL_CH
+-2	Spark S_L	Spark SQL和_H
+-1	Spark SQ_	Spark SQL和C_
+0	Spark SQL_	Spark SQL和CH_
+1	_park SQL	_park SQL和CH
+2	S_ark SQL	S_ark SQL和CH
+3	Sp_rk SQL	Sp_rk SQL和CH
+4	Spa_k SQL	Spa_k SQL和CH
+5	Spar_ SQL	Spar_ SQL和CH
+6	Spark_SQL	Spark_SQL和CH
+7	Spark _QL	Spark _QL和CH
+8	Spark S_L	Spark S_L和CH
+9	Spark SQ_	Spark SQ_和CH
+10	Spark SQL_	Spark SQL_CH
+11	Spark SQL_	Spark SQL和_H
+12	Spark SQL_	Spark SQL和C_
+13	Spark SQL_	Spark SQL和CH_
+Test with different length values
+-1	Spark ANSI 	Spark ANSI H
+0	Spark ANSI SQL	Spark ANSI SQL和CH
+1	Spark ANSI QL	Spark ANSI QL和CH
+2	Spark ANSI L	Spark ANSI L和CH
+3	Spark ANSI 	Spark ANSI 和CH
+4	Spark ANSI 	Spark ANSI CH
+5	Spark ANSI 	Spark ANSI H
+6	Spark ANSI 	Spark ANSI 
+Test with different input and replace values
+_	_
+Spark SQL	Spark SQL和CH
+ANSI 	ANSI 
+Spark SQL	Spark SQL和CH
diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql
index 4fd0791521d..4d0b5ecbe03 100644
--- a/tests/queries/0_stateless/03205_overlay.sql
+++ b/tests/queries/0_stateless/03205_overlay.sql
@@ -7,7 +7,17 @@ SELECT overlay('hello', 456, 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT overlay('hello', 'world', 'two', 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT overlay('hello', 'world', 2, 'three'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
-SELECT 'Positive test 1 with various combinations of const/non-const columns';
+SELECT 'Test with 3 arguments and various combinations of const/non-const columns';
+SELECT overlay('Spark SQL', '_', 6), overlayUTF8('Spark SQL和CH', '_', 6);
+SELECT overlay(materialize('Spark SQL'), '_', 6), overlayUTF8(materialize('Spark SQL和CH'), '_', 6);
+SELECT overlay('Spark SQL', materialize('_'), 6), overlayUTF8('Spark SQL和CH', materialize('_'), 6);
+SELECT overlay('Spark SQL', '_', materialize(6)), overlayUTF8('Spark SQL和CH', '_', materialize(6));
+SELECT overlay(materialize('Spark SQL'), materialize('_'), 6), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6);
+SELECT overlay(materialize('Spark SQL'), '_', materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6));
+SELECT overlay('Spark SQL', materialize('_'), materialize(6)), overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6));
+SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6));
+
+SELECT 'Test with 4 arguments and various combinations of const/non-const columns';
 SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0);
 SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0);
 SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0);
@@ -24,51 +34,15 @@ SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)
 SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0));
 SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0));
 SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0));
-	
-SELECT 'Positive test 2 with various combinations of const/non-const columns';
-SELECT overlay('Spark SQL', '_', 6), overlayUTF8('Spark SQL和CH', '_', 6);
-SELECT overlay(materialize('Spark SQL'), '_', 6), overlayUTF8(materialize('Spark SQL和CH'), '_', 6);
-SELECT overlay('Spark SQL', materialize('_'), 6), overlayUTF8('Spark SQL和CH', materialize('_'), 6);
-SELECT overlay('Spark SQL', '_', materialize(6)), overlayUTF8('Spark SQL和CH', '_', materialize(6));
-SELECT overlay(materialize('Spark SQL'), materialize('_'), 6), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6);
-SELECT overlay(materialize('Spark SQL'), '_', materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6));
-SELECT overlay('Spark SQL', materialize('_'), materialize(6)), overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6));
-SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6));
-	
-SELECT 'Positive test 3 with various combinations of const/non-const columns';
-SELECT overlay('Spark SQL', 'CORE', 7), overlayUTF8('Spark SQL和CH', 'CORE', 7);
-SELECT overlay(materialize('Spark SQL'), 'CORE', 7), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7);
-SELECT overlay('Spark SQL', materialize('CORE'), 7), overlayUTF8('Spark SQL和CH', materialize('CORE'), 7);
-SELECT overlay('Spark SQL', 'CORE', materialize(7)), overlayUTF8('Spark SQL和CH', 'CORE', materialize(7));
-SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7);
-SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7));
-SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)), overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7));
-SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7));
-	
-SELECT 'Positive test 4 with various combinations of const/non-const columns';
-SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0);
-SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0);
-SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0);
-SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0));
-SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0);
-SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0));
-SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0);
-SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0));
-SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0));
-SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0));
-	
-SELECT 'Positive test 5 with various combinations of const/non-const columns';
-SELECT overlay('Spark SQL', 'tructured', 2, 4), overlayUTF8('Spark SQL和CH', 'tructured', 2, 4);
-SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4);
-SELECT overlay('Spark SQL', materialize('tructured'), 2, 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4);
-SELECT overlay('Spark SQL', 'tructured', materialize(2), 4), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4);
-SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4));
-SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4);
-SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4);
-SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4));
-SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4);
-SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4));
-SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4));
-SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4));
+
+SELECT 'Test with different offset values';
+WITH number - 12 as offset SELECT offset, overlay('Spark SQL', '_', offset), overlayUTF8('Spark SQL和CH', '_', offset) from numbers(26);
+
+SELECT 'Test with different length values';
+WITH number - 1 as length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) from numbers(8);
+
+SELECT 'Test with different input and replace values';
+SELECT overlay('', '_', 6), overlayUTF8('', '_', 6);
+SELECT overlay('Spark SQL', '', 6), overlayUTF8('Spark SQL和CH', '', 6);
+SELECT overlay('', 'ANSI ', 7, 0), overlayUTF8('', 'ANSI ', 7, 0);
+SELECT overlay('Spark SQL', '', 7, 0), overlayUTF8('Spark SQL和CH', '', 7, 0);

From c37fc005d39f880b6ab12b01e80954f162817790 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 19 Aug 2024 13:28:23 +0200
Subject: [PATCH 225/363] init

---
 src/Functions/FunctionSQLJSON.h | 48 ++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h
index 83ed874c47b..66e0b4c2efd 100644
--- a/src/Functions/FunctionSQLJSON.h
+++ b/src/Functions/FunctionSQLJSON.h
@@ -123,7 +123,7 @@ public:
     class Executor
     {
     public:
-        static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const ContextPtr & context)
+        static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const bool & function_json_value_return_type_allow_complex)
         {
             MutableColumnPtr to{result_type->createColumn()};
             to->reserve(input_rows_count);
@@ -191,7 +191,7 @@ public:
                 {
                     /// Instead of creating a new generator for each row, we can reuse the same one.
                     generator_json_path.reinitialize();
-                    added_to_column = impl.insertResultToColumn(*to, document, generator_json_path, context);
+                    added_to_column = impl.insertResultToColumn(*to, document, generator_json_path, function_json_value_return_type_allow_complex);
                 }
                 if (!added_to_column)
                 {
@@ -204,13 +204,25 @@ public:
 };
 
 template <typename Name, template <typename, typename> typename Impl>
-class FunctionSQLJSON : public IFunction, WithConstContext
+class FunctionSQLJSON : public IFunction
 {
 public:
     static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionSQLJSON>(context_); }
-    explicit FunctionSQLJSON(ContextPtr context_) : WithConstContext(context_) { }
+    explicit FunctionSQLJSON(ContextPtr context_)
+    {
+        max_parser_depth = context_->getSettingsRef().max_parser_depth;
+        max_parser_backtracks = context_->getSettingsRef().max_parser_backtracks;
+        allow_simdjson = context_->getSettingsRef().allow_simdjson;
+        function_json_value_return_type_allow_complex = context_->getSettingsRef().function_json_value_return_type_allow_complex;
+        function_json_value_return_type_allow_nullable = context_->getSettingsRef().function_json_value_return_type_allow_nullable;
+    }
 
     static constexpr auto name = Name::name;
+    bool max_parser_depth;
+    bool max_parser_backtracks;
+    bool allow_simdjson;
+    bool function_json_value_return_type_allow_complex;
+    bool function_json_value_return_type_allow_nullable;
     String getName() const override { return Name::name; }
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
@@ -221,7 +233,7 @@ public:
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         return Impl<DummyJSONParser, DefaultJSONStringSerializer<DummyJSONParser::Element>>::getReturnType(
-            Name::name, arguments, getContext());
+            Name::name, arguments, function_json_value_return_type_allow_nullable);
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@@ -231,18 +243,18 @@ public:
         /// 2. Create ASTPtr
         /// 3. Parser(Tokens, ASTPtr) -> complete AST
         /// 4. Execute functions: call getNextItem on generator and handle each item
-        unsigned parse_depth = static_cast<unsigned>(getContext()->getSettingsRef().max_parser_depth);
-        unsigned parse_backtracks = static_cast<unsigned>(getContext()->getSettingsRef().max_parser_backtracks);
+        unsigned parse_depth = static_cast<unsigned>(max_parser_depth);
+        unsigned parse_backtracks = static_cast<unsigned>(max_parser_backtracks);
 #if USE_SIMDJSON
-        if (getContext()->getSettingsRef().allow_simdjson)
+        if (allow_simdjson)
             return FunctionSQLJSONHelpers::Executor<
                 Name,
                 Impl<SimdJSONParser, JSONStringSerializer<SimdJSONParser::Element, SimdJSONElementFormatter>>,
-                SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext());
+                SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, parse_backtracks, function_json_value_return_type_allow_complex);
 #endif
         return FunctionSQLJSONHelpers::
             Executor<Name, Impl<DummyJSONParser, DefaultJSONStringSerializer<DummyJSONParser::Element>>, DummyJSONParser>::run(
-                arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext());
+                arguments, result_type, input_rows_count, parse_depth, parse_backtracks, function_json_value_return_type_allow_complex);
     }
 };
 
@@ -267,11 +279,11 @@ class JSONExistsImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr &) { return std::make_shared<DataTypeUInt8>(); }
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool &) { return std::make_shared<DataTypeUInt8>(); }
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const ContextPtr &)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool &)
     {
         Element current_element = root;
         VisitorStatus status;
@@ -305,9 +317,9 @@ class JSONValueImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr & context)
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool & function_json_value_return_type_allow_nullable)
     {
-        if (context->getSettingsRef().function_json_value_return_type_allow_nullable)
+        if (function_json_value_return_type_allow_nullable)
         {
             DataTypePtr string_type = std::make_shared<DataTypeString>();
             return std::make_shared<DataTypeNullable>(string_type);
@@ -320,7 +332,7 @@ public:
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const ContextPtr & context)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool & function_json_value_return_type_allow_complex)
     {
         Element current_element = root;
         VisitorStatus status;
@@ -329,7 +341,7 @@ public:
         {
             if (status == VisitorStatus::Ok)
             {
-                if (context->getSettingsRef().function_json_value_return_type_allow_complex)
+                if (function_json_value_return_type_allow_complex)
                 {
                     break;
                 }
@@ -383,11 +395,11 @@ class JSONQueryImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr &) { return std::make_shared<DataTypeString>(); }
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool &) { return std::make_shared<DataTypeString>(); }
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const ContextPtr &)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool &)
     {
         ColumnString & col_str = assert_cast<ColumnString &>(dest);
 

From 2456df0d57301acb0e8c74a96e6411d95329d37c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 19 Aug 2024 13:39:29 +0200
Subject: [PATCH 226/363] Add a test

---
 .../test_dictionaries_postgresql/test.py      | 53 ++++++++++++++++++-
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py
index 516ac27ea26..c845a0b3d8b 100644
--- a/tests/integration/test_dictionaries_postgresql/test.py
+++ b/tests/integration/test_dictionaries_postgresql/test.py
@@ -530,10 +530,59 @@ def test_bad_configuration(started_cluster):
     """
     )
 
-    node1.query_and_get_error(
+    assert "Unexpected key `dbbb`" in node1.query_and_get_error(
         "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(1))"
     )
-    assert node1.contains_in_log("Unexpected key `dbbb`")
+
+
+def test_named_collection_from_ddl(started_cluster):
+    cursor = started_cluster.postgres_conn.cursor()
+    cursor.execute("DROP TABLE IF EXISTS test_table")
+    cursor.execute("CREATE TABLE test_table (id integer, value integer)")
+
+    node1.query(
+        """
+        DROP NAMED COLLECTION IF EXISTS pg_conn;
+        CREATE NAMED COLLECTION pg_conn
+        AS user = 'postgres', password = 'mysecretpassword', host = 'postgres1', port = 5432, database = 'postgres', table = 'test_table';
+    """
+    )
+
+    cursor.execute(
+        "INSERT INTO test_table SELECT i, i FROM generate_series(0, 99) as t(i)"
+    )
+
+    node1.query(
+        """
+    DROP DICTIONARY IF EXISTS postgres_dict;
+    CREATE DICTIONARY postgres_dict (id UInt32, value UInt32)
+    PRIMARY KEY id
+    SOURCE(POSTGRESQL(NAME pg_conn))
+        LIFETIME(MIN 1 MAX 2)
+        LAYOUT(HASHED());
+    """
+    )
+    result = node1.query("SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))")
+    assert int(result.strip()) == 99
+
+    node1.query(
+        """
+        DROP NAMED COLLECTION IF EXISTS pg_conn_2;
+        CREATE NAMED COLLECTION pg_conn_2
+        AS user = 'postgres', password = 'mysecretpassword', host = 'postgres1', port = 5432, dbbb = 'postgres', table = 'test_table';
+    """
+    )
+    node1.query(
+        """
+    DROP DICTIONARY IF EXISTS postgres_dict;
+    CREATE DICTIONARY postgres_dict (id UInt32, value UInt32)
+    PRIMARY KEY id
+    SOURCE(POSTGRESQL(NAME pg_conn_2))
+        LIFETIME(MIN 1 MAX 2)
+        LAYOUT(HASHED());
+    """
+    )
+    assert "Unexpected key `dbbb`" in node1.query_and_get_error("SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))")
 
 
 if __name__ == "__main__":

From 612416d5344720dade987fd3f4b44c31986742bb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:56:22 +0000
Subject: [PATCH 227/363] Automatic style fix

---
 tests/integration/test_dictionaries_postgresql/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py
index c845a0b3d8b..010ecdb5084 100644
--- a/tests/integration/test_dictionaries_postgresql/test.py
+++ b/tests/integration/test_dictionaries_postgresql/test.py
@@ -582,7 +582,9 @@ def test_named_collection_from_ddl(started_cluster):
         LAYOUT(HASHED());
     """
     )
-    assert "Unexpected key `dbbb`" in node1.query_and_get_error("SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))")
+    assert "Unexpected key `dbbb`" in node1.query_and_get_error(
+        "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))"
+    )
 
 
 if __name__ == "__main__":

From a2eec5f0ae198a2f7991ce69b50a1e169d46bec3 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Mon, 19 Aug 2024 13:56:40 +0200
Subject: [PATCH 228/363] CI: Minor release workflow fix

---
 .github/workflows/release_branches.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index a5cd6321e8c..82826794ea3 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -482,7 +482,7 @@ jobs:
         if: ${{ !failure() }}
         run: |
           # update overall ci report
-          python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
+          python3 ./tests/ci/finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
       - name: Check Workflow results
         if: ${{ !cancelled() }}
         run: |
@@ -490,5 +490,4 @@ jobs:
           cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
           ${{ toJson(needs) }}
           EOF
-
           python3 ./tests/ci/ci_buddy.py --check-wf-status

From 52b156d1baddbffe7100167096cdbb38f1007ead Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 19 Aug 2024 12:14:24 +0000
Subject: [PATCH 229/363] Avoid Pipeline stuck when query was cancelled by
 user.

---
 src/Processors/Executors/PipelineExecutor.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index 23b3a6d9f5f..d1dbfcb015b 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -79,6 +79,10 @@ const Processors & PipelineExecutor::getProcessors() const
 
 void PipelineExecutor::cancel(ExecutionStatus reason)
 {
+    /// It is allowed to cancel not started query by user.
+    if (reason == ExecutionStatus::CancelledByUser)
+        tryUpdateExecutionStatus(ExecutionStatus::NotStarted, reason);
+
     tryUpdateExecutionStatus(ExecutionStatus::Executing, reason);
     finish();
     graph->cancel();

From 6ee9cadc9550bf30c0c2e4b1176f803817c45bc0 Mon Sep 17 00:00:00 2001
From: shiyer7474 <shiyer@altinity.com>
Date: Mon, 19 Aug 2024 12:17:48 +0000
Subject: [PATCH 230/363] Set experimental_analyzer and no_parallel

---
 .../0_stateless/03222_datetime64_small_value_const.sql        | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
index 39266ba7992..a64ebd38ccf 100644
--- a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
+++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql
@@ -1,5 +1,6 @@
 -- Tags: shard
 set session_timezone = 'UTC'; -- don't randomize the session timezone
+SET allow_experimental_analyzer = 1;
 
 select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;
 select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0;
@@ -38,3 +39,6 @@ select count(*) from distr_03222_dt64 where dt < (select toDateTime64('2004-07-2
 drop table if exists shard_0.dt64_03222;
 drop table if exists shard_1.dt64_03222;
 drop table if exists distr_03222_dt64;
+
+drop database shard_0;
+drop database shard_1;

From 0d444e6708f04cf4a581f34246e57745552ab001 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 19 Aug 2024 15:39:24 +0200
Subject: [PATCH 231/363] review fixes

---
 src/Functions/FunctionSQLJSON.h | 35 +++++++++++++++++----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h
index 66e0b4c2efd..1c2a8d527f8 100644
--- a/src/Functions/FunctionSQLJSON.h
+++ b/src/Functions/FunctionSQLJSON.h
@@ -123,7 +123,7 @@ public:
     class Executor
     {
     public:
-        static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const bool & function_json_value_return_type_allow_complex)
+        static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const bool function_json_value_return_type_allow_complex)
         {
             MutableColumnPtr to{result_type->createColumn()};
             to->reserve(input_rows_count);
@@ -209,20 +209,15 @@ class FunctionSQLJSON : public IFunction
 public:
     static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionSQLJSON>(context_); }
     explicit FunctionSQLJSON(ContextPtr context_)
+        : max_parser_depth(context_->getSettingsRef().max_parser_depth),
+          max_parser_backtracks(context_->getSettingsRef().max_parser_backtracks),
+          allow_simdjson(context_->getSettingsRef().allow_simdjson),
+          function_json_value_return_type_allow_complex(context_->getSettingsRef().function_json_value_return_type_allow_complex),
+          function_json_value_return_type_allow_nullable(context_->getSettingsRef().function_json_value_return_type_allow_nullable)
     {
-        max_parser_depth = context_->getSettingsRef().max_parser_depth;
-        max_parser_backtracks = context_->getSettingsRef().max_parser_backtracks;
-        allow_simdjson = context_->getSettingsRef().allow_simdjson;
-        function_json_value_return_type_allow_complex = context_->getSettingsRef().function_json_value_return_type_allow_complex;
-        function_json_value_return_type_allow_nullable = context_->getSettingsRef().function_json_value_return_type_allow_nullable;
     }
 
     static constexpr auto name = Name::name;
-    bool max_parser_depth;
-    bool max_parser_backtracks;
-    bool allow_simdjson;
-    bool function_json_value_return_type_allow_complex;
-    bool function_json_value_return_type_allow_nullable;
     String getName() const override { return Name::name; }
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
@@ -256,6 +251,12 @@ public:
             Executor<Name, Impl<DummyJSONParser, DefaultJSONStringSerializer<DummyJSONParser::Element>>, DummyJSONParser>::run(
                 arguments, result_type, input_rows_count, parse_depth, parse_backtracks, function_json_value_return_type_allow_complex);
     }
+private:
+    const size_t max_parser_depth;
+    const size_t max_parser_backtracks;
+    const bool allow_simdjson;
+    const bool function_json_value_return_type_allow_complex;
+    const bool function_json_value_return_type_allow_nullable;
 };
 
 struct NameJSONExists
@@ -279,11 +280,11 @@ class JSONExistsImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool &) { return std::make_shared<DataTypeUInt8>(); }
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool) { return std::make_shared<DataTypeUInt8>(); }
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool &)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool)
     {
         Element current_element = root;
         VisitorStatus status;
@@ -317,7 +318,7 @@ class JSONValueImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool & function_json_value_return_type_allow_nullable)
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool function_json_value_return_type_allow_nullable)
     {
         if (function_json_value_return_type_allow_nullable)
         {
@@ -332,7 +333,7 @@ public:
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool & function_json_value_return_type_allow_complex)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool function_json_value_return_type_allow_complex)
     {
         Element current_element = root;
         VisitorStatus status;
@@ -395,11 +396,11 @@ class JSONQueryImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool &) { return std::make_shared<DataTypeString>(); }
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool) { return std::make_shared<DataTypeString>(); }
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool &)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool)
     {
         ColumnString & col_str = assert_cast<ColumnString &>(dest);
 

From 7e2804fad5e66b28074eb28fa1f6332c741cdad2 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 19 Aug 2024 15:48:27 +0200
Subject: [PATCH 232/363] remove const (review)

---
 src/Functions/FunctionSQLJSON.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h
index 1c2a8d527f8..4721f858f5c 100644
--- a/src/Functions/FunctionSQLJSON.h
+++ b/src/Functions/FunctionSQLJSON.h
@@ -123,7 +123,7 @@ public:
     class Executor
     {
     public:
-        static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const bool function_json_value_return_type_allow_complex)
+        static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, bool function_json_value_return_type_allow_complex)
         {
             MutableColumnPtr to{result_type->createColumn()};
             to->reserve(input_rows_count);
@@ -280,11 +280,11 @@ class JSONExistsImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool) { return std::make_shared<DataTypeUInt8>(); }
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool) { return std::make_shared<DataTypeUInt8>(); }
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, bool)
     {
         Element current_element = root;
         VisitorStatus status;
@@ -318,7 +318,7 @@ class JSONValueImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool function_json_value_return_type_allow_nullable)
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool function_json_value_return_type_allow_nullable)
     {
         if (function_json_value_return_type_allow_nullable)
         {
@@ -333,7 +333,7 @@ public:
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool function_json_value_return_type_allow_complex)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, bool function_json_value_return_type_allow_complex)
     {
         Element current_element = root;
         VisitorStatus status;
@@ -396,11 +396,11 @@ class JSONQueryImpl
 public:
     using Element = typename JSONParser::Element;
 
-    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool) { return std::make_shared<DataTypeString>(); }
+    static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool) { return std::make_shared<DataTypeString>(); }
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const bool)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, bool)
     {
         ColumnString & col_str = assert_cast<ColumnString &>(dest);
 

From ecd60eab5f218883e1bd7e113ac2e49a557b88f8 Mon Sep 17 00:00:00 2001
From: Nikita Fomichev <nikita.fomichev@clickhouse.com>
Date: Mon, 19 Aug 2024 17:03:53 +0200
Subject: [PATCH 233/363] Stateless tests: increase hung check timeout

---
 tests/clickhouse-test | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 1203ad3730a..5fb892597f7 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -2572,12 +2572,12 @@ def do_run_tests(jobs, test_suite: TestSuite):
             try:
                 clickhouse_execute(
                     args,
-                    query="SELECT 1 /*hang up check*/",
-                    max_http_retries=5,
-                    timeout=20,
+                    query="SELECT 1 /*hung check*/",
+                    max_http_retries=20,
+                    timeout=10,
                 )
             except Exception:
-                print("Hang up check failed")
+                print("Hung check failed")
                 server_died.set()
 
             if server_died.is_set():

From 2c8fade3d7da2ae934d06d55494a3896a0591f9b Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Mon, 19 Aug 2024 17:06:24 +0200
Subject: [PATCH 234/363] fix bug in mann whitney u test

---
 .../AggregateFunctionMannWhitney.cpp          |  2 +-
 .../01561_mann_whitney_scipy.python           | 22 ++++++++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
index fa90846650d..ecd848f5af3 100644
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
@@ -114,7 +114,7 @@ private:
             {
                 if (ind < first.size())
                     return first[ind];
-                return second[ind % first.size()];
+                return second[ind - first.size()];
             }
 
             size_t size() const
diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python
index 4713120287d..a15ab7a41d9 100644
--- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python
+++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python
@@ -17,9 +17,20 @@ def test_and_check(name, a, b, t_stat, p_value):
     client.query(
         "CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;"
     )
+    #client.query(
+    #    "INSERT INTO mann_whitney VALUES {};".format(
+    #        ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)])
+    #    )
+    #)
     client.query(
         "INSERT INTO mann_whitney VALUES {};".format(
-            ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)])
+            ", ".join(["({},{})".format(i, 0) for i in a])
+        )
+    )
+
+    client.query(
+        "INSERT INTO mann_whitney VALUES {};".format(
+            ", ".join(["({},{})".format(i, 1) for i in b])
         )
     )
 
@@ -59,6 +70,15 @@ def test_mann_whitney():
     test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p)
 
 
+def test_mann_whitney_skew():
+    rvs1 = [1]
+    rvs2 = [0,2,4]
+    s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided")
+    test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p)
+    test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p)
+
+
 if __name__ == "__main__":
     test_mann_whitney()
+    test_mann_whitney_skew()
     print("Ok.")

From 265d49e0917c5d1d9a43d46202b5e2d5b0d56a0b Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Mon, 19 Aug 2024 17:09:06 +0200
Subject: [PATCH 235/363] remove useless comments

---
 tests/queries/0_stateless/01561_mann_whitney_scipy.python | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python
index a15ab7a41d9..16bfb78e05b 100644
--- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python
+++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python
@@ -17,11 +17,6 @@ def test_and_check(name, a, b, t_stat, p_value):
     client.query(
         "CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;"
     )
-    #client.query(
-    #    "INSERT INTO mann_whitney VALUES {};".format(
-    #        ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)])
-    #    )
-    #)
     client.query(
         "INSERT INTO mann_whitney VALUES {};".format(
             ", ".join(["({},{})".format(i, 0) for i in a])

From cc8a40ef98f640b3eb2961ba559d65805afc2fd7 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Mon, 19 Aug 2024 17:23:50 +0200
Subject: [PATCH 236/363] CI: tidy build timeout 2h -> 3h

---
 tests/ci/ci_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 8ce0b9fde5a..5453bffd9c6 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -163,6 +163,7 @@ class CI:
                 tidy=True,
                 comment="clang-tidy is used for static analysis",
             ),
+            timeout=10800,
         ),
         BuildNames.BINARY_DARWIN: CommonJobConfigs.BUILD.with_properties(
             build_config=BuildConfig(

From 7fcacd16dfafebe7ae154d8b9a343a7d9ad11ec4 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Mon, 19 Aug 2024 17:43:44 +0200
Subject: [PATCH 237/363] fix black

---
 tests/queries/0_stateless/01561_mann_whitney_scipy.python | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python
index 16bfb78e05b..0f84d510933 100644
--- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python
+++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python
@@ -67,7 +67,7 @@ def test_mann_whitney():
 
 def test_mann_whitney_skew():
     rvs1 = [1]
-    rvs2 = [0,2,4]
+    rvs2 = [0, 2, 4]
     s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided")
     test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p)
     test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p)

From 2b75221e1daf2935f40acbb0e593953073f981f7 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 19 Aug 2024 18:00:28 +0200
Subject: [PATCH 238/363] Reduce max_dynamic_paths in
 01825_new_type_json_ghdata.sh to consume less memory

---
 tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
index 33940caec29..f165223fb98 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh
@@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata"
-${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
+${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
 
 cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} \
   --max_memory_usage 10G --query "INSERT INTO ghdata FORMAT JSONAsObject"

From f9de579b41ac8b70c9638d658b37c77809e25324 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 19 Aug 2024 18:01:44 +0200
Subject: [PATCH 239/363] Reduce max_dynamic_paths in another ghdata test

---
 .../0_stateless/01825_new_type_json_ghdata_insert_select.sh   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
index 568ba2bd185..3f5fc91f8fc 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
@@ -10,9 +10,9 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2"
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_string"
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_from_string"
 
-${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
+${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'"
-${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
+${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
 
 cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2 FORMAT JSONAsObject"
 cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_string FORMAT JSONAsString"

From 3d567c5241a4cfa44b96fa3d91edd726ad24e762 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Mon, 19 Aug 2024 17:08:46 +0000
Subject: [PATCH 240/363] Move enabling experimental settings to a separate
 file

---
 src/Databases/DatabaseReplicated.cpp          | 35 +--------------
 .../enableAllExperimentalSettings.cpp         | 43 +++++++++++++++++++
 src/Databases/enableAllExperimentalSettings.h | 10 +++++
 3 files changed, 55 insertions(+), 33 deletions(-)
 create mode 100644 src/Databases/enableAllExperimentalSettings.cpp
 create mode 100644 src/Databases/enableAllExperimentalSettings.h

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index b936a400c42..8e3378bcc12 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -21,6 +21,7 @@
 #include <Databases/DatabaseReplicatedWorker.h>
 #include <Databases/DDLDependencyVisitor.h>
 #include <Databases/TablesDependencyGraph.h>
+#include <Databases/enableAllExperimentalSettings.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DatabaseCatalog.h>
@@ -1142,39 +1143,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
 
         /// We will execute some CREATE queries for recovery (not ATTACH queries),
         /// so we need to allow experimental features that can be used in a CREATE query
-        query_context->setSetting("allow_experimental_inverted_index", 1);
-        query_context->setSetting("allow_experimental_full_text_index", 1);
-        query_context->setSetting("allow_experimental_codecs", 1);
-        query_context->setSetting("allow_experimental_live_view", 1);
-        query_context->setSetting("allow_experimental_window_view", 1);
-        query_context->setSetting("allow_experimental_funnel_functions", 1);
-        query_context->setSetting("allow_experimental_nlp_functions", 1);
-        query_context->setSetting("allow_experimental_hash_functions", 1);
-        query_context->setSetting("allow_experimental_object_type", 1);
-        query_context->setSetting("allow_experimental_variant_type", 1);
-        query_context->setSetting("allow_experimental_dynamic_type", 1);
-        query_context->setSetting("allow_experimental_json_type", 1);
-        query_context->setSetting("allow_experimental_vector_similarity_index", 1);
-        query_context->setSetting("allow_experimental_bigint_types", 1);
-        query_context->setSetting("allow_experimental_window_functions", 1);
-        query_context->setSetting("allow_experimental_geo_types", 1);
-        query_context->setSetting("allow_experimental_map_type", 1);
-        query_context->setSetting("allow_deprecated_error_prone_window_functions", 1);
-
-        query_context->setSetting("allow_suspicious_low_cardinality_types", 1);
-        query_context->setSetting("allow_suspicious_fixed_string_types", 1);
-        query_context->setSetting("allow_suspicious_indices", 1);
-        query_context->setSetting("allow_suspicious_codecs", 1);
-        query_context->setSetting("allow_hyperscan", 1);
-        query_context->setSetting("allow_simdjson", 1);
-        query_context->setSetting("allow_deprecated_syntax_for_merge_tree", 1);
-        query_context->setSetting("allow_suspicious_primary_key", 1);
-        query_context->setSetting("allow_suspicious_ttl_expressions", 1);
-        query_context->setSetting("allow_suspicious_variant_types", 1);
-        query_context->setSetting("enable_deflate_qpl_codec", 1);
-        query_context->setSetting("enable_zstd_qat_codec", 1);
-        query_context->setSetting("allow_create_index_without_type", 1);
-        query_context->setSetting("allow_experimental_s3queue", 1);
+        enableAllExperimentalSettings(query_context);
 
         auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false, "");
         query_context->initZooKeeperMetadataTransaction(txn);
diff --git a/src/Databases/enableAllExperimentalSettings.cpp b/src/Databases/enableAllExperimentalSettings.cpp
new file mode 100644
index 00000000000..1614f1e70d3
--- /dev/null
+++ b/src/Databases/enableAllExperimentalSettings.cpp
@@ -0,0 +1,43 @@
+#include <Interpreters/Context.h>
+
+namespace DB
+{
+
+void enableAllExperimentalSettings(ContextMutablePtr context)
+{
+    context->setSetting("allow_experimental_inverted_index", 1);
+    context->setSetting("allow_experimental_full_text_index", 1);
+    context->setSetting("allow_experimental_codecs", 1);
+    context->setSetting("allow_experimental_live_view", 1);
+    context->setSetting("allow_experimental_window_view", 1);
+    context->setSetting("allow_experimental_funnel_functions", 1);
+    context->setSetting("allow_experimental_nlp_functions", 1);
+    context->setSetting("allow_experimental_hash_functions", 1);
+    context->setSetting("allow_experimental_object_type", 1);
+    context->setSetting("allow_experimental_variant_type", 1);
+    context->setSetting("allow_experimental_dynamic_type", 1);
+    context->setSetting("allow_experimental_json_type", 1);
+    context->setSetting("allow_experimental_vector_similarity_index", 1);
+    context->setSetting("allow_experimental_bigint_types", 1);
+    context->setSetting("allow_experimental_window_functions", 1);
+    context->setSetting("allow_experimental_geo_types", 1);
+    context->setSetting("allow_experimental_map_type", 1);
+    context->setSetting("allow_deprecated_error_prone_window_functions", 1);
+
+    context->setSetting("allow_suspicious_low_cardinality_types", 1);
+    context->setSetting("allow_suspicious_fixed_string_types", 1);
+    context->setSetting("allow_suspicious_indices", 1);
+    context->setSetting("allow_suspicious_codecs", 1);
+    context->setSetting("allow_hyperscan", 1);
+    context->setSetting("allow_simdjson", 1);
+    context->setSetting("allow_deprecated_syntax_for_merge_tree", 1);
+    context->setSetting("allow_suspicious_primary_key", 1);
+    context->setSetting("allow_suspicious_ttl_expressions", 1);
+    context->setSetting("allow_suspicious_variant_types", 1);
+    context->setSetting("enable_deflate_qpl_codec", 1);
+    context->setSetting("enable_zstd_qat_codec", 1);
+    context->setSetting("allow_create_index_without_type", 1);
+    context->setSetting("allow_experimental_s3queue", 1);
+}
+
+}
diff --git a/src/Databases/enableAllExperimentalSettings.h b/src/Databases/enableAllExperimentalSettings.h
new file mode 100644
index 00000000000..1ba1e1a4ba5
--- /dev/null
+++ b/src/Databases/enableAllExperimentalSettings.h
@@ -0,0 +1,10 @@
+#pragma once
+#include <Interpreters/Context_fwd.h>
+
+namespace DB
+{
+
+/// Enable all experimental settings that can be used in CREATE query.
+void enableAllExperimentalSettings(ContextMutablePtr context);
+
+}

From 31f236d4f256634a90a87408fb9b454cb2ced28b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 19 Aug 2024 17:28:27 +0000
Subject: [PATCH 241/363] Minor logging fixes

---
 programs/local/LocalServer.cpp | 6 +++---
 programs/server/Server.cpp     | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index a8b774562f9..184f147a86a 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -713,7 +713,7 @@ void LocalServer::processConfig()
     if (index_uncompressed_cache_size > max_cache_size)
     {
         index_uncompressed_cache_size = max_cache_size;
-        LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+        LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(index_uncompressed_cache_size));
     }
     global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
 
@@ -723,7 +723,7 @@ void LocalServer::processConfig()
     if (index_mark_cache_size > max_cache_size)
     {
         index_mark_cache_size = max_cache_size;
-        LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+        LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(index_mark_cache_size));
     }
     global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
 
@@ -731,7 +731,7 @@ void LocalServer::processConfig()
     if (mmap_cache_size > max_cache_size)
     {
         mmap_cache_size = max_cache_size;
-        LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+        LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mmap_cache_size));
     }
     global_context->setMMappedFileCache(mmap_cache_size);
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 2c1f141bb1e..16fee378cf0 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1420,7 +1420,7 @@ try
     if (index_uncompressed_cache_size > max_cache_size)
     {
         index_uncompressed_cache_size = max_cache_size;
-        LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+        LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(index_uncompressed_cache_size));
     }
     global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
 
@@ -1430,7 +1430,7 @@ try
     if (index_mark_cache_size > max_cache_size)
     {
         index_mark_cache_size = max_cache_size;
-        LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+        LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(index_mark_cache_size));
     }
     global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
 
@@ -1438,7 +1438,7 @@ try
     if (mmap_cache_size > max_cache_size)
     {
         mmap_cache_size = max_cache_size;
-        LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+        LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mmap_cache_size));
     }
     global_context->setMMappedFileCache(mmap_cache_size);
 
@@ -1449,7 +1449,7 @@ try
     if (query_cache_max_size_in_bytes > max_cache_size)
     {
         query_cache_max_size_in_bytes = max_cache_size;
-        LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+        LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(query_cache_max_size_in_bytes));
     }
     global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);
 

From b3c3ea45ebdd6fae840f7ce5b47f2ce0226ce7fb Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 19 Aug 2024 14:24:58 +0000
Subject: [PATCH 242/363] Slightly better logging

---
 src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
index 27bfcbbddcf..4c0da28c3c4 100644
--- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
@@ -190,6 +190,8 @@ MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity(
 
 void MergeTreeIndexGranuleVectorSimilarity::serializeBinary(WriteBuffer & ostr) const
 {
+    LOG_TRACE(logger, "Start writing vector similarity index");
+
     if (empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name));
 
@@ -207,6 +209,8 @@ void MergeTreeIndexGranuleVectorSimilarity::serializeBinary(WriteBuffer & ostr)
 
 void MergeTreeIndexGranuleVectorSimilarity::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
 {
+    LOG_TRACE(logger, "Start loading vector similarity index");
+
     UInt64 file_version;
     readIntBinary(file_version, istr);
     if (file_version != FILE_FORMAT_VERSION)

From 2f5fb44695fdb1f66a927f1402512dbd065b8c41 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 19 Aug 2024 19:35:44 +0200
Subject: [PATCH 243/363] Better check and error code

---
 src/DataTypes/DataTypeObject.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp
index 11fffd8769b..9846ae876ca 100644
--- a/src/DataTypes/DataTypeObject.cpp
+++ b/src/DataTypes/DataTypeObject.cpp
@@ -56,8 +56,8 @@ DataTypeObject::DataTypeObject(
         /// Don't log errors to stderr.
         options.set_log_errors(false);
         auto regexp = re2::RE2(regexp_str, options);
-        if (!regexp.error().empty())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid regexp '{}': {}", regexp_str, regexp.error());
+        if (!regexp.ok())
+            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Invalid regexp '{}': {}", regexp_str, regexp.error());
     }
 
     for (const auto & [typed_path, type] : typed_paths)

From f49fe765328c3637ab781a986a0f6c463fc0ecde Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 19 Aug 2024 19:36:00 +0200
Subject: [PATCH 244/363] Add missing error code

---
 src/DataTypes/DataTypeObject.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp
index 9846ae876ca..0d410d6f24c 100644
--- a/src/DataTypes/DataTypeObject.cpp
+++ b/src/DataTypes/DataTypeObject.cpp
@@ -33,6 +33,7 @@ namespace ErrorCodes
 {
     extern const int UNEXPECTED_AST_STRUCTURE;
     extern const int BAD_ARGUMENTS;
+    extern const int CANNOT_COMPILE_REGEXP;
 }
 
 DataTypeObject::DataTypeObject(

From 51c5b91b0a58f6577adba0c1e5717f649104c17b Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 19 Aug 2024 19:36:20 +0200
Subject: [PATCH 245/363] Update error code in tests

---
 tests/queries/0_stateless/03227_json_invalid_regexp.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.sql b/tests/queries/0_stateless/03227_json_invalid_regexp.sql
index 734dea1aac6..d98e2ade29d 100644
--- a/tests/queries/0_stateless/03227_json_invalid_regexp.sql
+++ b/tests/queries/0_stateless/03227_json_invalid_regexp.sql
@@ -1,4 +1,4 @@
 set allow_experimental_json_type = 1;
-create table test (json JSON(SKIP REGEXP '[]')) engine=Memory(); -- {serverError BAD_ARGUMENTS}
-create table test (json JSON(SKIP REGEXP '+')) engine=Memory(); -- {serverError BAD_ARGUMENTS};
+create table test (json JSON(SKIP REGEXP '[]')) engine=Memory(); -- {serverError CANNOT_COMPILE_REGEXP}
+create table test (json JSON(SKIP REGEXP '+')) engine=Memory(); -- {serverError CANNOT_COMPILE_REGEXP};
 

From a1bec15eb70509f9602057c3697199fc1df30e18 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Mon, 19 Aug 2024 17:59:40 +0000
Subject: [PATCH 246/363] Add style check for new allow_ settings

---
 .../enableAllExperimentalSettings.cpp         |  6 +++
 src/Databases/enableAllExperimentalSettings.h |  7 ++-
 utils/check-style/check-style                 |  4 ++
 .../experimental_settings_ignore.txt          | 48 +++++++++++++++++++
 4 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 utils/check-style/experimental_settings_ignore.txt

diff --git a/src/Databases/enableAllExperimentalSettings.cpp b/src/Databases/enableAllExperimentalSettings.cpp
index 1614f1e70d3..9abe05d7bce 100644
--- a/src/Databases/enableAllExperimentalSettings.cpp
+++ b/src/Databases/enableAllExperimentalSettings.cpp
@@ -3,6 +3,12 @@
 namespace DB
 {
 
+/*
+ * Enables all settings that allow the use of experimental, deprecated, or potentially unsafe features
+ * in a CREATE query. This function is used in DatabaseReplicated::recoverLostReplica() to create tables
+ * when the original settings used to create the table are not available.
+ */
+
 void enableAllExperimentalSettings(ContextMutablePtr context)
 {
     context->setSetting("allow_experimental_inverted_index", 1);
diff --git a/src/Databases/enableAllExperimentalSettings.h b/src/Databases/enableAllExperimentalSettings.h
index 1ba1e1a4ba5..ec3bfb98843 100644
--- a/src/Databases/enableAllExperimentalSettings.h
+++ b/src/Databases/enableAllExperimentalSettings.h
@@ -4,7 +4,12 @@
 namespace DB
 {
 
-/// Enable all experimental settings that can be used in CREATE query.
+/*
+ * Enables all settings that allow the use of experimental, deprecated, or potentially unsafe features
+ * in a CREATE query. This function is used in DatabaseReplicated::recoverLostReplica() to create tables
+ * when the original settings used to create the table are not available.
+ */
+
 void enableAllExperimentalSettings(ContextMutablePtr context);
 
 }
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 3c959617d02..46593e85e45 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -467,3 +467,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
     grep -vP $EXCLUDE_DIRS |
     xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' &&
     echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice."
+
+PATTERN="allow_";
+DIFF=$(comm -3 <(grep -o "\b$PATTERN\w*\b" $ROOT_PATH/src/Core/Settings.h | sort -u) <(grep -o -h "\b$PATTERN\w*\b" $ROOT_PATH/src/Databases/enableAllExperimentalSettings.cpp $ROOT_PATH/utils/check-style/experimental_settings_ignore.txt | sort -u));
+[ -n "$DIFF" ] && echo "$DIFF" && echo "^^ Detected 'allow_*' settings that might need to be included in src/Databases/enableAllExperimentalSettings.cpp" && echo "Alternatively, consider adding an exception to utils/check-style/experimental_settings_ignore.txt"
diff --git a/utils/check-style/experimental_settings_ignore.txt b/utils/check-style/experimental_settings_ignore.txt
new file mode 100644
index 00000000000..94c46cf562e
--- /dev/null
+++ b/utils/check-style/experimental_settings_ignore.txt
@@ -0,0 +1,48 @@
+allow_aggregate_partitions_independently
+allow_archive_path_syntax
+allow_asynchronous_read_from_io_pool_for_merge_tree
+allow_changing_replica_until_first_data_packet
+allow_custom_error_code_in_throwif
+allow_ddl
+allow_deprecated_database_ordinary
+allow_deprecated_snowflake_conversion_functions
+allow_distributed_ddl
+allow_drop_detached
+allow_execute_multiif_columnar
+allow_experimental_alter_materialized_view_structure
+allow_experimental_analyzer
+allow_experimental_annoy_index
+allow_experimental_database_atomic
+allow_experimental_database_materialized_mysql
+allow_experimental_database_materialized_postgresql
+allow_experimental_database_replicated
+allow_experimental_join_condition
+allow_experimental_kafka_offsets_storage_in_keeper
+allow_experimental_lightweight_delete
+allow_experimental_materialized_postgresql_table
+allow_experimental_parallel_reading_from_replicas
+allow_experimental_projection_optimization
+allow_experimental_query_cache
+allow_experimental_query_deduplication
+allow_experimental_refreshable_materialized_view
+allow_experimental_shared_merge_tree
+allow_experimental_statistic
+allow_experimental_statistics
+allow_experimental_time_series_table
+allow_experimental_undrop_table_query
+allow_experimental_usearch_index
+allow_get_client_http_header
+allow_introspection_functions
+allow_materialized_view_with_bad_select
+allow_named_collection_override_by_default
+allow_non_metadata_alters
+allow_nonconst_timezone_arguments
+allow_nondeterministic_mutations
+allow_nondeterministic_optimize_skip_unused_shards
+allow_prefetched_read_pool_for_local_filesystem
+allow_prefetched_read_pool_for_remote_filesystem
+allow_push_predicate_when_subquery_contains_with
+allow_settings_after_format_in_insert
+allow_statistic_optimize
+allow_statistics_optimize
+allow_unrestricted_reads_from_keeper

From 2d998a9f6a12499dcc82186ff9f0c5edaf173c6c Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 19 Aug 2024 20:19:11 +0200
Subject: [PATCH 247/363] Fix test

---
 .../03037_dynamic_merges_1_horizontal_compact_merge_tree.sql    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
index 46f1c78b255..5e7d1ee9826 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
@@ -33,5 +33,3 @@ optimize table test final;
 select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
 
 drop table test;
-
-select 1;

From bc9cac605a58b4dce38bf06da889639f5bd62438 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 19 Aug 2024 22:00:35 +0000
Subject: [PATCH 248/363] Fix enumerating dynamic subcolumns

---
 src/DataTypes/IDataType.cpp                       |  1 +
 src/DataTypes/Serializations/ISerialization.h     |  1 +
 .../Serializations/SerializationDynamic.cpp       |  2 +-
 .../Serializations/SerializationObject.cpp        |  2 +-
 ...dynamic_subcolumns_enumerate_streams.reference | 15 +++++++++++++++
 ...03227_dynamic_subcolumns_enumerate_streams.sql |  9 +++++++++
 6 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference
 create mode 100644 tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql

diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index 7fd8a85aeca..d14461a4ff1 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -150,6 +150,7 @@ std::unique_ptr<IDataType::SubstreamData> IDataType::getSubcolumnData(
 
     ISerialization::EnumerateStreamsSettings settings;
     settings.position_independent_encoding = false;
+    settings.enumerate_dynamic_streams = false;
     data.serialization->enumerateStreams(settings, callback_with_data, data);
 
     if (!res && data.type->hasDynamicSubcolumnsData())
diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h
index 480d5a4f7c4..d19cb0dd365 100644
--- a/src/DataTypes/Serializations/ISerialization.h
+++ b/src/DataTypes/Serializations/ISerialization.h
@@ -241,6 +241,7 @@ public:
     {
         SubstreamPath path;
         bool position_independent_encoding = true;
+        bool enumerate_dynamic_streams = true;
     };
 
     virtual void enumerateStreams(
diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index 6bba87c40fa..10a80aa8a0d 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -64,7 +64,7 @@ void SerializationDynamic::enumerateStreams(
     const auto * deserialize_state = data.deserialize_state ? checkAndGetState<DeserializeBinaryBulkStateDynamic>(data.deserialize_state) : nullptr;
 
     /// If column is nullptr and we don't have deserialize state yet, nothing to enumerate as we don't have any variants.
-    if (!column_dynamic && !deserialize_state)
+    if (!settings.enumerate_dynamic_streams || (!column_dynamic && !deserialize_state))
         return;
 
     const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState<DeserializeBinaryBulkStateDynamicStructure>(deserialize_state->structure_state)->variant_type;
diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp
index 2dd25e540cc..45121b942e6 100644
--- a/src/DataTypes/Serializations/SerializationObject.cpp
+++ b/src/DataTypes/Serializations/SerializationObject.cpp
@@ -130,7 +130,7 @@ void SerializationObject::enumerateStreams(EnumerateStreamsSettings & settings,
     }
 
     /// If column or deserialization state was provided, iterate over dynamic paths,
-    if (column_object || structure_state)
+    if (settings.enumerate_dynamic_streams && (column_object || structure_state))
     {
         /// Enumerate dynamic paths in sorted order for consistency.
         const auto * dynamic_paths = column_object ? &column_object->getDynamicPaths() : nullptr;
diff --git a/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference
new file mode 100644
index 00000000000..b9d711bf9f0
--- /dev/null
+++ b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference
@@ -0,0 +1,15 @@
+\N
+\N
+\N
+\N
+\N
+str_0
+str_1
+str_2
+str_3
+str_4
+\N
+\N
+\N
+\N
+\N
diff --git a/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql
new file mode 100644
index 00000000000..e451521faca
--- /dev/null
+++ b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql
@@ -0,0 +1,9 @@
+set allow_experimental_json_type=1;
+drop table if exists test;
+create table test (json JSON) engine=Memory;
+insert into test select toJSONString(map('a', 'str_' || number)) from numbers(5);
+select json.a.String from test;
+select json.a.:String from test;
+select json.a.UInt64 from test;
+drop table test;
+

From 246b050e300da89c408b32eba70b5e1326e28764 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Tue, 20 Aug 2024 00:38:50 +0000
Subject: [PATCH 249/363] Fix filename

---
 ...er_pushdown_bug.sh.sh => 02841_parquet_filter_pushdown_bug.sh} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{02841_parquet_filter_pushdown_bug.sh.sh => 02841_parquet_filter_pushdown_bug.sh} (100%)

diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh
similarity index 100%
rename from tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh
rename to tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh

From 079523de1046ce725c3f65a23393d49bb483d910 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 20 Aug 2024 06:03:15 +0200
Subject: [PATCH 250/363] Add changelog for 24.8

---
 CHANGELOG.md | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1793fd14ccd..8bccaa45134 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
 ### Table of Contents
+**[ClickHouse release v24.8 LTS, 2024-08-20](#243)**<br/>
 **[ClickHouse release v24.7, 2024-07-30](#247)**<br/>
 **[ClickHouse release v24.6, 2024-07-01](#246)**<br/>
 **[ClickHouse release v24.5, 2024-05-30](#245)**<br/>
@@ -10,6 +11,153 @@
 
 # 2024 Changelog
 
+### <a id="248"></a> ClickHouse release 24.8 LTS, 2024-08-20
+
+#### Backward Incompatible Change
+* `clickhouse-client` and `clickhouse-local` now default to multi-query mode (instead single-query mode). As an example, `clickhouse-client -q "SELECT 1; SELECT 2"` now works, whereas users previously had to add `--multiquery` (or `-n`). The `--multiquery/-n` switch became obsolete. INSERT queries in multi-query statements are treated specially based on their FORMAT clause: If the FORMAT is `VALUES` (the most common case), the end of the INSERT statement is represented by a trailing semicolon `;` at the end of the query. For all other FORMATs (e.g. `CSV` or `JSONEachRow`), the end of the INSERT statement is represented by two newlines `\n\n` at the end of the query. [#63898](https://github.com/ClickHouse/ClickHouse/pull/63898) ([FFish](https://github.com/wxybear)).
+* In previous versions, it was possible to use an alternative syntax for `LowCardinality` data types by appending `WithDictionary` to the name of the data type. It was an initial working implementation, and it was never documented or exposed to the public. Now, it is deprecated. If you have used this syntax, you have to ALTER your tables and rename the data types to `LowCardinality`. [#66842](https://github.com/ClickHouse/ClickHouse/pull/66842) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix logical errors with storage `Buffer` used with distributed destination table. It's a backward incompatible change: queries using `Buffer` with a distributed destination table may stop working if the table appears more than once in the query (e.g., in a self-join). [#67015](https://github.com/ClickHouse/ClickHouse/pull/67015) ([vdimir](https://github.com/vdimir)).
+* In previous versions, calling functions for random distributions based on the Gamma function (such as Chi-Squared, Student, Fisher) with negative arguments close to zero led to a long computation or an infinite loop. In the new version, calling these functions with zero or negative arguments will produce an exception. This closes [#67297](https://github.com/ClickHouse/ClickHouse/issues/67297). [#67326](https://github.com/ClickHouse/ClickHouse/pull/67326) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The system table `text_log` is enabled by default. This is fully compatible with previous versions, but you may notice subtly increased disk usage on the local disk (this system table takes a tiny amount of disk space). [#67428](https://github.com/ClickHouse/ClickHouse/pull/67428) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* In previous versions, `arrayWithConstant` can be slow if asked to generate very large arrays. In the new version, it is limited to 1 GB per array. This closes [#32754](https://github.com/ClickHouse/ClickHouse/issues/32754). [#67741](https://github.com/ClickHouse/ClickHouse/pull/67741) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix REPLACE modifier formatting (forbid omitting brackets). [#67774](https://github.com/ClickHouse/ClickHouse/pull/67774) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68349](https://github.com/ClickHouse/ClickHouse/issues/68349): Reimplement `Dynamic` type. Now when the limit of dynamic data types is reached new types are not casted to String but stored in a special data structure in binary format with binary encoded data type. Now any type ever inserted into `Dynamic` column can be read from it as subcolumn. [#68132](https://github.com/ClickHouse/ClickHouse/pull/68132) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### New Feature
+* Added a new `MergeTree` setting `deduplicate_merge_projection_mode` to control the projections during merges (for specific engines) and `OPTIMIZE DEDUPLICATE` query. Supported options: `throw` (throw an exception in case the projection is not fully supported for *MergeTree engine), `drop` (remove projection during merge if it can't be merged itself consistently) and `rebuild` (rebuild projection from scratch, which is a heavy operation). [#66672](https://github.com/ClickHouse/ClickHouse/pull/66672) ([jsc0218](https://github.com/jsc0218)).
+* Add `_etag` virtual column for S3 table engine. Fixes [#65312](https://github.com/ClickHouse/ClickHouse/issues/65312). [#65386](https://github.com/ClickHouse/ClickHouse/pull/65386) ([skyoct](https://github.com/skyoct)).
+* Added a tagging (namespace) mechanism for the query cache. The same queries with different tags are considered different by the query cache. Example: `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'abc'` and `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'def'` now create different query cache entries. [#68235](https://github.com/ClickHouse/ClickHouse/pull/68235) ([sakulali](https://github.com/sakulali)).
+* Support more variants of JOIN strictness (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y` (see the setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)).
+* Intrpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Add function `printf` for Spark compatiability (but you can use the existing `format` function). [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)).
+* Added a new server setting, `disable_insertion_and_mutation`. If it is enabled, the server will deny all insertions and mutations. This includes asynchronous INSERTs. This setting can be used to create read-only replicas. [#66519](https://github.com/ClickHouse/ClickHouse/pull/66519) ([Xu Jia](https://github.com/XuJia0210)).
+* Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to `Null` engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)).
+* Added support for reading `MULTILINESTRING` geometry in `WKT` format using function `readWKTLineString`. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)).
+* Add a new table function `fuzzQuery`. This function allows the modification of a given query string with random variations. Example: `SELECT query FROM fuzzQuery('SELECT 1') LIMIT 5;`. [#67655](https://github.com/ClickHouse/ClickHouse/pull/67655) ([pufit](https://github.com/pufit)).
+* Add a query `ALTER TABLE ... DROP DETACHED PARTITION ALL` to drop all detached partitions. [#67885](https://github.com/ClickHouse/ClickHouse/pull/67885) ([Duc Canh Le](https://github.com/canhld94)).
+* Add the `rows_before_aggregation_at_least` statistic to the query response when a new setting, `rows_before_aggregation` is enabled. This statistic represents the number of rows read before aggregation. In the context of a distributed query, when using the `group by` or `max` aggregation function without a `limit`, `rows_before_aggregation_at_least` can reflect the number of rows hit by the query. [#66084](https://github.com/ClickHouse/ClickHouse/pull/66084) ([morning-color](https://github.com/morning-color)).
+* Support `OPTIMIZE` query on `Join` tables to reduce their memory footprint. [#67883](https://github.com/ClickHouse/ClickHouse/pull/67883) ([Duc Canh Le](https://github.com/canhld94)).
+
+#### Experimental Feature
+* Implement a new `JSON` data type. [#66444](https://github.com/ClickHouse/ClickHouse/pull/66444) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add the new `TimeSeries` table engine. [#64183](https://github.com/ClickHouse/ClickHouse/pull/64183) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Add new experimental `Kafka` storage engine to store offsets in Keeper instead of relying on committing them to Kafka. It makes the commit to ClickHouse tables atomic with regard to consumption from the queue. [#57625](https://github.com/ClickHouse/ClickHouse/pull/57625) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Use adaptive read task size calculation method (adaptive meaning it depends on read column sizes) for parallel replicas. [#60377](https://github.com/ClickHouse/ClickHouse/pull/60377) ([Nikita Taranov](https://github.com/nickitat)).
+* Added statistics type `count_min` (count-min sketches) which provide selectivity estimations for equality predicates like `col = 'val'`. Supported data types are string, date, datatime and numeric types. [#65521](https://github.com/ClickHouse/ClickHouse/pull/65521) ([JackyWoo](https://github.com/JackyWoo)).
+
+#### Performance Improvement
+* Setting `optimize_functions_to_subcolumns` is enabled by default. [#68053](https://github.com/ClickHouse/ClickHouse/pull/68053) ([Anton Popov](https://github.com/CurtizJ)).
+* Store the `plain_rewritable` disk directory metadata in `__meta` layout, separately from the merge tree data in the object storage. Move the `plain_rewritable` disk to a flat directory structure. [#65751](https://github.com/ClickHouse/ClickHouse/pull/65751) ([Julia Kartseva](https://github.com/jkartseva)).
+* Improve columns squashing (an operation happening in INSERT queries) for `String`/`Array`/`Map`/`Variant`/`Dynamic` types by reserving required memory in advance for all subcolumns. [#67043](https://github.com/ClickHouse/ClickHouse/pull/67043) ([Kruglov Pavel](https://github.com/Avogar)).
+* Speed up `SYSTEM FLUSH LOGS` and flush logs on shutdown. [#67472](https://github.com/ClickHouse/ClickHouse/pull/67472) ([Sema Checherinda](https://github.com/CheSema)).
+* Improved overall performance of merges by reducing the overhead of the scheduling steps of merges. [#68016](https://github.com/ClickHouse/ClickHouse/pull/68016) ([Anton Popov](https://github.com/CurtizJ)).
+* Speed up tables removal for `DROP DATABASE` query, increased the default value for `database_catalog_drop_table_concurrency` to 16. [#67228](https://github.com/ClickHouse/ClickHouse/pull/67228) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Avoid allocating too much capacity for array column while writing ORC. Performance speeds up 15% for an Array column. [#67879](https://github.com/ClickHouse/ClickHouse/pull/67879) ([李扬](https://github.com/taiyang-li)).
+* Speed up mutations for non-replicated MergeTree significantly [#66911](https://github.com/ClickHouse/ClickHouse/pull/66911) [#66909](https://github.com/ClickHouse/ClickHouse/pull/66909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Improvement
+* Setting `allow_experimental_analyzer` is renamed to `enable_analyzer`. The old name is preserved in a form of an alias. This signifies that Analyzer is no longer in beta and is fully promoted to production. [#66438](https://github.com/ClickHouse/ClickHouse/pull/66438) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Improve schema inference of date times. Now DateTime64 used only when date time has fractional part, otherwise regular DateTime is used. Inference of Date/DateTime is more strict now, especially when `date_time_input_format='best_effort'` to avoid inferring date times from strings in corner cases. [#68382](https://github.com/ClickHouse/ClickHouse/pull/68382) ([Kruglov Pavel](https://github.com/Avogar)).
+* ClickHouse server now supports new setting `max_keep_alive_requests`. For keep-alive HTTP connections to the server it works in tandem with `keep_alive_timeout` - if idle timeout not expired but there already more than `max_keep_alive_requests` requests done through the given connection - it will be closed by the server. [#61793](https://github.com/ClickHouse/ClickHouse/pull/61793) ([Nikita Taranov](https://github.com/nickitat)).
+* Various improvements in the advanced dashboard. This closes [#67697](https://github.com/ClickHouse/ClickHouse/issues/67697). This closes [#63407](https://github.com/ClickHouse/ClickHouse/issues/63407). This closes [#51129](https://github.com/ClickHouse/ClickHouse/issues/51129). This closes [#61204](https://github.com/ClickHouse/ClickHouse/issues/61204). [#67701](https://github.com/ClickHouse/ClickHouse/pull/67701) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Do not require a grant for REMOTE when creating a Distributed table: a grant for the Distributed engine is enough. [#65419](https://github.com/ClickHouse/ClickHouse/pull/65419) ([jsc0218](https://github.com/jsc0218)).
+* Do not pass logs for keeper explicitly in the Docker image to allow overriding. [#65564](https://github.com/ClickHouse/ClickHouse/pull/65564) ([Azat Khuzhin](https://github.com/azat)).
+* Introduced `use_same_password_for_base_backup` settings for `BACKUP` and `RESTORE` queries, allowing to create and restore incremental backups to/from password protected archives. [#66214](https://github.com/ClickHouse/ClickHouse/pull/66214) ([Samuele](https://github.com/sguerrini97)).
+* Ignore `async_load_databases` for `ATTACH` query (previously it was possible for ATTACH to return before the tables had been attached). [#66240](https://github.com/ClickHouse/ClickHouse/pull/66240) ([Azat Khuzhin](https://github.com/azat)).
+* Added logs and metrics for rejected connections (where there are not enough resources). [#66410](https://github.com/ClickHouse/ClickHouse/pull/66410) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Support proper `UUID` type for MongoDB engine. [#66671](https://github.com/ClickHouse/ClickHouse/pull/66671) ([Azat Khuzhin](https://github.com/azat)).
+* Add replication lag and recovery time metrics. [#66703](https://github.com/ClickHouse/ClickHouse/pull/66703) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
+* Add `DiskS3NoSuchKeyErrors` metric. [#66704](https://github.com/ClickHouse/ClickHouse/pull/66704) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
+* Ensure the `COMMENT` clause works for all table engines. [#66832](https://github.com/ClickHouse/ClickHouse/pull/66832) ([Joe Lynch](https://github.com/joelynch)).
+* Function `mapFromArrays` now accepts `Map(K, V)` as first argument, for example: `SELECT mapFromArrays(map('a', 4, 'b', 4), ['aa', 'bb'])` now works and returns `{('a',4):'aa',('b',4):'bb'}`. Also, if the 1st argument is an Array, it can now also be of type `Array(Nullable(T))` or `Array(LowCardinality(Nullable(T)))` as long as the actual array values are not `NULL`. [#67103](https://github.com/ClickHouse/ClickHouse/pull/67103) ([李扬](https://github.com/taiyang-li)).
+* Read configuration for `clickhouse-local` from `~/.clickhouse-local`. [#67135](https://github.com/ClickHouse/ClickHouse/pull/67135) ([Azat Khuzhin](https://github.com/azat)).
+* Rename setting `input_format_orc_read_use_writer_time_zone` to `input_format_orc_reader_timezone` and allow the user to set the reader timezone. [#67175](https://github.com/ClickHouse/ClickHouse/pull/67175) ([kevinyhzou](https://github.com/KevinyhZou)).
+* Decrease level of the `Socket is not connected` error when HTTP connection immediately reset by peer after connecting, close [#34218](https://github.com/ClickHouse/ClickHouse/issues/34218). [#67177](https://github.com/ClickHouse/ClickHouse/pull/67177) ([vdimir](https://github.com/vdimir)).
+* Add ability to load dashboards for `system.dashboards` from config (once set, they overrides the default dashboards preset). [#67232](https://github.com/ClickHouse/ClickHouse/pull/67232) ([Azat Khuzhin](https://github.com/azat)).
+* The window functions in SQL are traditionally in snake case. ClickHouse uses `camelCase`, so new aliases `denseRank()` and `percentRank()` have been created. These new functions can be called the exact same as the original `dense_rank()` and `percent_rank()` functions. Both snake case and camelCase syntaxes remain usable. A new test for each of the functions has been added as well. This closes [#67042](https://github.com/ClickHouse/ClickHouse/issues/67042) . [#67334](https://github.com/ClickHouse/ClickHouse/pull/67334) ([Peter Nguyen](https://github.com/petern48)).
+* Autodetect configuration file format if is not `.xml`, `.yml` or `.yaml`. If the file begins with &lt; it might be XML, otherwise it might be YAML. It is useful when providing a configuration file from a pipe: `clickhouse-server --config-file <(echo "hello: world")`. [#67391](https://github.com/ClickHouse/ClickHouse/pull/67391) ([sakulali](https://github.com/sakulali)).
+* Functions `formatDateTime` and `formatDateTimeInJodaSyntax` now treat their format parameter as optional. If it is not specified, format strings `%Y-%m-%d %H:%i:%s` and `yyyy-MM-dd HH:mm:ss` are assumed. Example: `SELECT parseDateTime('2021-01-04 23:12:34')` now returns DateTime value `2021-01-04 23:12:34` (previously, this threw an exception). [#67399](https://github.com/ClickHouse/ClickHouse/pull/67399) ([Robert Schulze](https://github.com/rschu1ze)).
+* Automatically retry Keeper requests in KeeperMap if they happen because of timeout or connection loss. [#67448](https://github.com/ClickHouse/ClickHouse/pull/67448) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add `-no-pie` to Aarch64 Linux builds to allow proper introspection and symbolizing of stacktraces after a ClickHouse restart. [#67916](https://github.com/ClickHouse/ClickHouse/pull/67916) ([filimonov](https://github.com/filimonov)).
+* Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix settings and `current_database` in `system.processes` for async BACKUP/RESTORE. [#68163](https://github.com/ClickHouse/ClickHouse/pull/68163) ([Azat Khuzhin](https://github.com/azat)).
+* Remove unnecessary logs for non-replicated `MergeTree`. [#68238](https://github.com/ClickHouse/ClickHouse/pull/68238) ([Daniil Ivanik](https://github.com/divanik)).
+* Allow run query instantly in play if you add `&run=1` in the URL [#66457](https://github.com/ClickHouse/ClickHouse/pull/66457) ([Aleksandr Musorin](https://github.com/AVMusorin)).
+
+#### Build/Testing/Packaging Improvement
+* Integration tests flaky check will not run each test case multiple times to find more issues in tests and make them more reliable. It is using `pytest-repeat` library to run test case multiple times for the same environment. It is important to cleanup tables and other entities in the end of a test case to pass. Repeating works much faster than several pytest runs as it starts necessary containers only once. [#66986](https://github.com/ClickHouse/ClickHouse/pull/66986) ([Ilya Yatsishin](https://github.com/qoega)).
+* Unblock the usage of CLion with ClickHouse. In previous versions, CLion freezed for a minute on every keypress. This closes [#66994](https://github.com/ClickHouse/ClickHouse/issues/66994). [#66995](https://github.com/ClickHouse/ClickHouse/pull/66995) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* getauxval: avoid a crash under a sanitizer re-exec due to high ASLR entropy in newer Linux kernels. [#67081](https://github.com/ClickHouse/ClickHouse/pull/67081) ([Raúl Marín](https://github.com/Algunenano)).
+* Some parts of client code are extracted to a single file and highest possible level optimization is applied to them even for debug builds. This closes: [#65745](https://github.com/ClickHouse/ClickHouse/issues/65745). [#67215](https://github.com/ClickHouse/ClickHouse/pull/67215) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### Bug Fix
+* Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
+* Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)).
+* Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Fix formatting of query with aliased JOIN ON expression, e.g. `... JOIN t2 ON (x = y) AS e ORDER BY x` should be formatted as `... JOIN t2 ON ((x = y) AS e) ORDER BY x`. [#66312](https://github.com/ClickHouse/ClickHouse/pull/66312) ([vdimir](https://github.com/vdimir)).
+* Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Fix possible runtime error while converting Array field with nulls to Array(Variant). [#66727](https://github.com/ClickHouse/ClickHouse/pull/66727) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Fix creating KeeperMap table after an incomplete drop. [#66865](https://github.com/ClickHouse/ClickHouse/pull/66865) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix broken part error while restoring to a `s3_plain_rewritable` disk. [#66881](https://github.com/ClickHouse/ClickHouse/pull/66881) ([Vitaly Baranov](https://github.com/vitlibar)).
+* In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
+* Fix invalid format detection in schema inference that could lead to logical error Format {} doesn't support schema inference. [#66899](https://github.com/ClickHouse/ClickHouse/pull/66899) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)).
+* Forbid create as select even when database_replicated_allow_heavy_create is set. It was unconditionally forbidden in 23.12 and accidentally allowed under the setting in unreleased 24.7. [#66980](https://github.com/ClickHouse/ClickHouse/pull/66980) ([vdimir](https://github.com/vdimir)).
+* Reading from the `numbers` could wrongly throw an exception when the `max_rows_to_read` limit was set. This closes [#66992](https://github.com/ClickHouse/ClickHouse/issues/66992). [#66996](https://github.com/ClickHouse/ClickHouse/pull/66996) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add proper type conversion to lagInFrame and leadInFrame window functions - fixes msan test. [#67091](https://github.com/ClickHouse/ClickHouse/pull/67091) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Use a separate client context in `clickhouse-local`. [#67133](https://github.com/ClickHouse/ClickHouse/pull/67133) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Correct behavior of `ORDER BY all` with disabled `enable_order_by_all` and parallel replicas (distributed queries as well). [#67153](https://github.com/ClickHouse/ClickHouse/pull/67153) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix wrong usage of input_format_max_bytes_to_read_for_schema_inference in schema cache. [#67157](https://github.com/ClickHouse/ClickHouse/pull/67157) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix the memory leak for count distinct, when exception issued during group by single nullable key. [#67171](https://github.com/ClickHouse/ClickHouse/pull/67171) ([Jet He](https://github.com/compasses)).
+* This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix error `Conversion from AggregateFunction(name, Type) to AggregateFunction(name, Nullable(Type)) is not supported`. The bug was caused by the `optimize_rewrite_aggregate_function_with_if` optimization. Fixes [#67112](https://github.com/ClickHouse/ClickHouse/issues/67112). [#67229](https://github.com/ClickHouse/ClickHouse/pull/67229) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix hung query when using empty tuple as lhs of function IN. [#67295](https://github.com/ClickHouse/ClickHouse/pull/67295) ([Duc Canh Le](https://github.com/canhld94)).
+* It was possible to create a very deep nested JSON data that triggered stack overflow while skipping unknown fields. This closes [#67292](https://github.com/ClickHouse/ClickHouse/issues/67292). [#67324](https://github.com/ClickHouse/ClickHouse/pull/67324) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix attaching ReplicatedMergeTree table after exception during startup. [#67360](https://github.com/ClickHouse/ClickHouse/pull/67360) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix segfault caused by incorrectly detaching from thread group in `Aggregator`. [#67385](https://github.com/ClickHouse/ClickHouse/pull/67385) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix one more case when a non-deterministic function is specified in PK. [#67395](https://github.com/ClickHouse/ClickHouse/pull/67395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fixed `bloom_filter` index breaking queries with mildly weird conditions like `(k=2)=(k=2)` or `has([1,2,3], k)`. [#67423](https://github.com/ClickHouse/ClickHouse/pull/67423) ([Michael Kolupaev](https://github.com/al13n321)).
+* Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix wait for tasks in ~WriteBufferFromS3 in case WriteBuffer was cancelled. [#67459](https://github.com/ClickHouse/ClickHouse/pull/67459) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Protect temporary part directories from removing during RESTORE. [#67491](https://github.com/ClickHouse/ClickHouse/pull/67491) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix `Logical error: Expected the argument №N of type T to have X rows, but it has 0`. The error could happen in a remote query with constant expression in `GROUP BY` (with a new analyzer). [#67536](https://github.com/ClickHouse/ClickHouse/pull/67536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix join on tuple with NULLs: Some queries with the new analyzer and `NULL` inside the tuple in the `JOIN ON` section returned incorrect results. [#67538](https://github.com/ClickHouse/ClickHouse/pull/67538) ([vdimir](https://github.com/vdimir)).
+* Fix redundant reschedule of FileCache::freeSpaceRatioKeepingThreadFunc() in case of full non-evictable cache. [#67540](https://github.com/ClickHouse/ClickHouse/pull/67540) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fix for function `toStartOfWeek` which returned the wrong result with a small `DateTime64` value. [#67558](https://github.com/ClickHouse/ClickHouse/pull/67558) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix `Logical error: 'file_offset_of_buffer_end <= read_until_position'` in filesystem cache. Closes [#57508](https://github.com/ClickHouse/ClickHouse/issues/57508). [#67623](https://github.com/ClickHouse/ClickHouse/pull/67623) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixes [#62282](https://github.com/ClickHouse/ClickHouse/issues/62282). Removed the call to `convertFieldToString()` and added datatype specific serialization code. Parameterized view substitution was broken for multiple datatypes when parameter value was a function or expression returning datatype instance. [#67654](https://github.com/ClickHouse/ClickHouse/pull/67654) ([Shankar](https://github.com/shiyer7474)).
+* Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)).
+* Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix possible logical error "Unexpected return type from if" with experimental Variant type and enabled setting `use_variant_as_common_type ` in function if with Tuples and Maps. [#67687](https://github.com/ClickHouse/ClickHouse/pull/67687) ([Kruglov Pavel](https://github.com/Avogar)).
+* Due to a bug in Linux Kernel, a query can hung in `TimerDescriptor::drain`. This closes [#37686](https://github.com/ClickHouse/ClickHouse/issues/37686). [#67702](https://github.com/ClickHouse/ClickHouse/pull/67702) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix completion of `RESTORE ON CLUSTER` command. [#67720](https://github.com/ClickHouse/ClickHouse/pull/67720) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix dictionary hang in case of CANNOT_SCHEDULE_TASK while loading. [#67751](https://github.com/ClickHouse/ClickHouse/pull/67751) ([Azat Khuzhin](https://github.com/azat)).
+* Queries like `SELECT count() FROM t WHERE cast(c = 1 or c = 9999 AS Bool) SETTINGS use_skip_indexes=1` with bloom filter indexes on `c` now work correctly. [#67781](https://github.com/ClickHouse/ClickHouse/pull/67781) ([jsc0218](https://github.com/jsc0218)).
+* Fix wrong aggregation result in some queries with aggregation without keys and filter, close [#67419](https://github.com/ClickHouse/ClickHouse/issues/67419). [#67804](https://github.com/ClickHouse/ClickHouse/pull/67804) ([vdimir](https://github.com/vdimir)).
+* Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix DateTime64 parsing after constant folding in distributed queries, close [#66773](https://github.com/ClickHouse/ClickHouse/issues/66773). [#67920](https://github.com/ClickHouse/ClickHouse/pull/67920) ([vdimir](https://github.com/vdimir)).
+* Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Now ClickHouse doesn't consider part as broken if projection doesn't exist on disk but exists in `checksums.txt`. [#68003](https://github.com/ClickHouse/ClickHouse/pull/68003) ([alesapin](https://github.com/alesapin)).
+* Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Attempt to fix `Block structure mismatch in AggregatingStep stream: different types` for aggregate projection optimization. [#68107](https://github.com/ClickHouse/ClickHouse/pull/68107) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68343](https://github.com/ClickHouse/ClickHouse/issues/68343): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68400](https://github.com/ClickHouse/ClickHouse/issues/68400): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+
+
 ### <a id="247"></a> ClickHouse release 24.7, 2024-07-30
 
 #### Backward Incompatible Change

From ef696fad3915ab9823a8ed9d99d737ec9c73a6f0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 20 Aug 2024 06:19:26 +0200
Subject: [PATCH 251/363] Miscellaneous

---
 CHANGELOG.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8bccaa45134..5b85ee6f207 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,7 @@
 * Add a query `ALTER TABLE ... DROP DETACHED PARTITION ALL` to drop all detached partitions. [#67885](https://github.com/ClickHouse/ClickHouse/pull/67885) ([Duc Canh Le](https://github.com/canhld94)).
 * Add the `rows_before_aggregation_at_least` statistic to the query response when a new setting, `rows_before_aggregation` is enabled. This statistic represents the number of rows read before aggregation. In the context of a distributed query, when using the `group by` or `max` aggregation function without a `limit`, `rows_before_aggregation_at_least` can reflect the number of rows hit by the query. [#66084](https://github.com/ClickHouse/ClickHouse/pull/66084) ([morning-color](https://github.com/morning-color)).
 * Support `OPTIMIZE` query on `Join` tables to reduce their memory footprint. [#67883](https://github.com/ClickHouse/ClickHouse/pull/67883) ([Duc Canh Le](https://github.com/canhld94)).
+* Allow run query instantly in play if you add `&run=1` in the URL [#66457](https://github.com/ClickHouse/ClickHouse/pull/66457) ([Aleksandr Musorin](https://github.com/AVMusorin)).
 
 #### Experimental Feature
 * Implement a new `JSON` data type. [#66444](https://github.com/ClickHouse/ClickHouse/pull/66444) ([Kruglov Pavel](https://github.com/Avogar)).
@@ -82,7 +83,6 @@
 * Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)).
 * Fix settings and `current_database` in `system.processes` for async BACKUP/RESTORE. [#68163](https://github.com/ClickHouse/ClickHouse/pull/68163) ([Azat Khuzhin](https://github.com/azat)).
 * Remove unnecessary logs for non-replicated `MergeTree`. [#68238](https://github.com/ClickHouse/ClickHouse/pull/68238) ([Daniil Ivanik](https://github.com/divanik)).
-* Allow run query instantly in play if you add `&run=1` in the URL [#66457](https://github.com/ClickHouse/ClickHouse/pull/66457) ([Aleksandr Musorin](https://github.com/AVMusorin)).
 
 #### Build/Testing/Packaging Improvement
 * Integration tests flaky check will not run each test case multiple times to find more issues in tests and make them more reliable. It is using `pytest-repeat` library to run test case multiple times for the same environment. It is important to cleanup tables and other entities in the end of a test case to pass. Repeating works much faster than several pytest runs as it starts necessary containers only once. [#66986](https://github.com/ClickHouse/ClickHouse/pull/66986) ([Ilya Yatsishin](https://github.com/qoega)).
@@ -118,7 +118,7 @@
 * Correct behavior of `ORDER BY all` with disabled `enable_order_by_all` and parallel replicas (distributed queries as well). [#67153](https://github.com/ClickHouse/ClickHouse/pull/67153) ([Igor Nikonov](https://github.com/devcrafter)).
 * Fix wrong usage of input_format_max_bytes_to_read_for_schema_inference in schema cache. [#67157](https://github.com/ClickHouse/ClickHouse/pull/67157) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix the memory leak for count distinct, when exception issued during group by single nullable key. [#67171](https://github.com/ClickHouse/ClickHouse/pull/67171) ([Jet He](https://github.com/compasses)).
-* This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix an error in optimization which converts OUTER JOIN to INNER JOIN. This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
 * Fix error `Conversion from AggregateFunction(name, Type) to AggregateFunction(name, Nullable(Type)) is not supported`. The bug was caused by the `optimize_rewrite_aggregate_function_with_if` optimization. Fixes [#67112](https://github.com/ClickHouse/ClickHouse/issues/67112). [#67229](https://github.com/ClickHouse/ClickHouse/pull/67229) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Fix hung query when using empty tuple as lhs of function IN. [#67295](https://github.com/ClickHouse/ClickHouse/pull/67295) ([Duc Canh Le](https://github.com/canhld94)).
 * It was possible to create a very deep nested JSON data that triggered stack overflow while skipping unknown fields. This closes [#67292](https://github.com/ClickHouse/ClickHouse/issues/67292). [#67324](https://github.com/ClickHouse/ClickHouse/pull/67324) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
@@ -154,8 +154,8 @@
 * Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
 * Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
 * Attempt to fix `Block structure mismatch in AggregatingStep stream: different types` for aggregate projection optimization. [#68107](https://github.com/ClickHouse/ClickHouse/pull/68107) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Backported in [#68343](https://github.com/ClickHouse/ClickHouse/issues/68343): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* Backported in [#68400](https://github.com/ClickHouse/ClickHouse/issues/68400): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
 
 
 ### <a id="247"></a> ClickHouse release 24.7, 2024-07-30

From 03e2e717b0346bd6ade0e6a94e0680ef088fe21d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 20 Aug 2024 06:21:07 +0200
Subject: [PATCH 252/363] Miscellaneous

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5b85ee6f207..93dcfa18999 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,7 +27,7 @@
 * Added a new `MergeTree` setting `deduplicate_merge_projection_mode` to control the projections during merges (for specific engines) and `OPTIMIZE DEDUPLICATE` query. Supported options: `throw` (throw an exception in case the projection is not fully supported for *MergeTree engine), `drop` (remove projection during merge if it can't be merged itself consistently) and `rebuild` (rebuild projection from scratch, which is a heavy operation). [#66672](https://github.com/ClickHouse/ClickHouse/pull/66672) ([jsc0218](https://github.com/jsc0218)).
 * Add `_etag` virtual column for S3 table engine. Fixes [#65312](https://github.com/ClickHouse/ClickHouse/issues/65312). [#65386](https://github.com/ClickHouse/ClickHouse/pull/65386) ([skyoct](https://github.com/skyoct)).
 * Added a tagging (namespace) mechanism for the query cache. The same queries with different tags are considered different by the query cache. Example: `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'abc'` and `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'def'` now create different query cache entries. [#68235](https://github.com/ClickHouse/ClickHouse/pull/68235) ([sakulali](https://github.com/sakulali)).
-* Support more variants of JOIN strictness (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y` (see the setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)).
+* Support more variants of JOIN strictness (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequality conditions which involve columns from both left and right table. e.g. `t1.y < t2.y` (see the setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)).
 * Intrpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * Add function `printf` for Spark compatiability (but you can use the existing `format` function). [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)).
 * Added a new server setting, `disable_insertion_and_mutation`. If it is enabled, the server will deny all insertions and mutations. This includes asynchronous INSERTs. This setting can be used to create read-only replicas. [#66519](https://github.com/ClickHouse/ClickHouse/pull/66519) ([Xu Jia](https://github.com/XuJia0210)).

From 9b55180184c0f38cf88bf5902687603cf903ee40 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 9 Aug 2024 14:22:33 +0000
Subject: [PATCH 253/363] CI: Move out scripts from dockers

---
 docker/images.json                            |  7 +----
 docker/test/fasttest/Dockerfile               |  3 --
 docker/test/stateful/Dockerfile               |  4 ---
 docker/test/stateful/setup_minio.sh           |  1 -
 docker/test/stateless/Dockerfile              | 12 --------
 docker/test/stress/Dockerfile                 |  3 --
 docker/test/upgrade/Dockerfile                | 29 -------------------
 docker/test/util/Dockerfile                   |  2 --
 tests/ci/.mypy.ini                            |  3 +-
 tests/ci/ci_config.py                         |  5 +++-
 tests/ci/ci_definitions.py                    |  7 +++--
 tests/ci/docker_images_check.py               |  2 +-
 tests/ci/fast_test_check.py                   |  7 ++---
 tests/ci/functional_test_check.py             | 20 ++++++-------
 tests/ci/stress_check.py                      | 17 +++++++----
 tests/ci/upgrade_check.py                     |  2 +-
 .../docker_scripts}/attach_gdb.lib            |  2 +-
 .../docker_scripts}/create.sql                |  0
 .../docker_scripts/fasttest_runner.sh         |  2 +-
 .../process_functional_tests_result.py        | 24 +++++++--------
 .../docker_scripts}/setup_hdfs_minicluster.sh |  2 +-
 .../docker_scripts}/setup_minio.sh            |  2 +-
 .../docker_scripts/stateful_runner.sh         | 13 +++++----
 .../docker_scripts/stateless_runner.sh        | 22 +++++++++-----
 .../docker_scripts/stress_runner.sh           | 21 +++++++-------
 .../docker_scripts}/stress_tests.lib          |  2 +-
 .../docker_scripts/upgrade_runner.sh          | 14 ++++-----
 .../docker_scripts}/utils.lib                 |  0
 28 files changed, 94 insertions(+), 134 deletions(-)
 delete mode 120000 docker/test/stateful/setup_minio.sh
 delete mode 100644 docker/test/upgrade/Dockerfile
 rename {docker/test/stateless => tests/docker_scripts}/attach_gdb.lib (98%)
 rename {docker/test/stateful => tests/docker_scripts}/create.sql (100%)
 rename docker/test/fasttest/run.sh => tests/docker_scripts/fasttest_runner.sh (99%)
 rename {docker/test/util => tests/docker_scripts}/process_functional_tests_result.py (92%)
 rename {docker/test/stateless => tests/docker_scripts}/setup_hdfs_minicluster.sh (95%)
 rename {docker/test/stateless => tests/docker_scripts}/setup_minio.sh (98%)
 rename docker/test/stateful/run.sh => tests/docker_scripts/stateful_runner.sh (98%)
 rename docker/test/stateless/run.sh => tests/docker_scripts/stateless_runner.sh (97%)
 rename docker/test/stress/run.sh => tests/docker_scripts/stress_runner.sh (97%)
 mode change 100644 => 100755
 rename {docker/test/stateless => tests/docker_scripts}/stress_tests.lib (99%)
 rename docker/test/upgrade/run.sh => tests/docker_scripts/upgrade_runner.sh (96%)
 mode change 100644 => 100755
 rename {docker/test/stateless => tests/docker_scripts}/utils.lib (100%)

diff --git a/docker/images.json b/docker/images.json
index 716b76ee217..055394b69e6 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -47,8 +47,7 @@
     "docker/test/stateful": {
         "name": "clickhouse/stateful-test",
         "dependent": [
-            "docker/test/stress",
-            "docker/test/upgrade"
+            "docker/test/stress"
         ]
     },
     "docker/test/unit": {
@@ -59,10 +58,6 @@
         "name": "clickhouse/stress-test",
         "dependent": []
     },
-    "docker/test/upgrade": {
-        "name": "clickhouse/upgrade-check",
-        "dependent": []
-    },
     "docker/test/integration/runner": {
         "name": "clickhouse/integration-tests-runner",
         "dependent": []
diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index 5d311c673a4..ca93b24f66e 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -93,6 +93,3 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 ENV COMMIT_SHA=''
 ENV PULL_REQUEST_NUMBER=''
 ENV COPY_CLICKHOUSE_BINARY_TO_OUTPUT=0
-
-COPY run.sh /
-CMD ["/bin/bash", "/run.sh"]
diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile
index 0daf88cad7e..9aa936cb069 100644
--- a/docker/test/stateful/Dockerfile
+++ b/docker/test/stateful/Dockerfile
@@ -10,7 +10,3 @@ RUN apt-get update -y \
         npm \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
-
-COPY create.sql /
-COPY run.sh /
-CMD ["/bin/bash", "/run.sh"]
diff --git a/docker/test/stateful/setup_minio.sh b/docker/test/stateful/setup_minio.sh
deleted file mode 120000
index 0d539f72cb3..00000000000
--- a/docker/test/stateful/setup_minio.sh
+++ /dev/null
@@ -1 +0,0 @@
-../stateless/setup_minio.sh
\ No newline at end of file
diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile
index b0c4914a4e8..69f81b35a95 100644
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@@ -85,18 +85,6 @@ RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoo
 ENV MINIO_ROOT_USER="clickhouse"
 ENV MINIO_ROOT_PASSWORD="clickhouse"
 ENV EXPORT_S3_STORAGE_POLICIES=1
-ENV CLICKHOUSE_GRPC_CLIENT="/usr/share/clickhouse-utils/grpc-client/clickhouse-grpc-client.py"
 
 RUN npm install -g azurite@3.30.0 \
     && npm install -g tslib && npm install -g node
-
-COPY run.sh /
-COPY setup_minio.sh /
-COPY setup_hdfs_minicluster.sh /
-COPY attach_gdb.lib /
-COPY utils.lib /
-
-# We store stress_tests.lib in stateless image to avoid duplication of this file in stress and upgrade tests
-COPY stress_tests.lib /
-
-CMD ["/bin/bash", "/run.sh"]
diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile
index 0f81a1cd07f..ecb98a4e3ed 100644
--- a/docker/test/stress/Dockerfile
+++ b/docker/test/stress/Dockerfile
@@ -22,8 +22,5 @@ RUN apt-get update -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
-COPY run.sh /
-
 ENV EXPORT_S3_STORAGE_POLICIES=1
 
-CMD ["/bin/bash", "/run.sh"]
diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile
deleted file mode 100644
index 78d912fd031..00000000000
--- a/docker/test/upgrade/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-# rebuild in #33610
-# docker build -t clickhouse/upgrade-check .
-ARG FROM_TAG=latest
-FROM clickhouse/stateful-test:$FROM_TAG
-
-RUN apt-get update -y \
-    && env DEBIAN_FRONTEND=noninteractive \
-        apt-get install --yes --no-install-recommends \
-            bash \
-            tzdata \
-            parallel \
-            expect \
-            python3 \
-            python3-lxml \
-            python3-termcolor \
-            python3-requests \
-            curl \
-            sudo \
-            openssl \
-            netcat-openbsd \
-            brotli \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
-
-COPY run.sh /
-
-ENV EXPORT_S3_STORAGE_POLICIES=1
-
-CMD ["/bin/bash", "/run.sh"]
diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile
index 8b949ed95db..6b9fb94a4c6 100644
--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@@ -56,7 +56,5 @@ RUN apt-get update \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 
-COPY process_functional_tests_result.py /
-
 COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
 ENV PATH="/opt/gdb/bin:${PATH}"
diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini
index f12d27979ce..ecb4aef87dd 100644
--- a/tests/ci/.mypy.ini
+++ b/tests/ci/.mypy.ini
@@ -15,4 +15,5 @@ warn_return_any = True
 no_implicit_reexport = True
 strict_equality = True
 extra_checks = True
-ignore_missing_imports = True
\ No newline at end of file
+ignore_missing_imports = True
+logging-fstring-interpolation = False
\ No newline at end of file
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 5453bffd9c6..58de25f039f 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -535,7 +535,10 @@ class CI:
         JobNames.FAST_TEST: JobConfig(
             pr_only=True,
             digest=DigestConfig(
-                include_paths=["./tests/queries/0_stateless/"],
+                include_paths=[
+                    "./tests/queries/0_stateless/",
+                    "./tests/docker_scripts/",
+                ],
                 exclude_files=[".md"],
                 docker=["clickhouse/fasttest"],
             ),
diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py
index 13c222b10b9..1cdb3f1487e 100644
--- a/tests/ci/ci_definitions.py
+++ b/tests/ci/ci_definitions.py
@@ -415,6 +415,7 @@ class CommonJobConfigs:
                 "./tests/clickhouse-test",
                 "./tests/config",
                 "./tests/*.txt",
+                "./tests/docker_scripts/",
             ],
             exclude_files=[".md"],
             docker=["clickhouse/stateless-test"],
@@ -431,6 +432,7 @@ class CommonJobConfigs:
                 "./tests/clickhouse-test",
                 "./tests/config",
                 "./tests/*.txt",
+                "./tests/docker_scripts/",
             ],
             exclude_files=[".md"],
             docker=["clickhouse/stateful-test"],
@@ -448,6 +450,7 @@ class CommonJobConfigs:
                 "./tests/clickhouse-test",
                 "./tests/config",
                 "./tests/*.txt",
+                "./tests/docker_scripts/",
             ],
             exclude_files=[".md"],
             docker=["clickhouse/stress-test"],
@@ -459,9 +462,9 @@ class CommonJobConfigs:
     UPGRADE_TEST = JobConfig(
         job_name_keyword="upgrade",
         digest=DigestConfig(
-            include_paths=["./tests/ci/upgrade_check.py"],
+            include_paths=["./tests/ci/upgrade_check.py", "./tests/docker_scripts/"],
             exclude_files=[".md"],
-            docker=["clickhouse/upgrade-check"],
+            docker=["clickhouse/stress-test"],
         ),
         run_command="upgrade_check.py",
         runner_type=Runners.STRESS_TESTER,
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index 786a529e0a9..c8dbcd10245 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -93,7 +93,7 @@ def process_single_image(
     results = []  # type: TestResults
     for ver in versions:
         stopwatch = Stopwatch()
-        for i in range(5):
+        for i in range(2):
             success, build_log = build_and_push_one_image(
                 image, ver, additional_cache, push, from_tag
             )
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index ed727dd3659..55eefcf9714 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -31,15 +31,14 @@ def get_fasttest_cmd(
         "--security-opt seccomp=unconfined "  # required to issue io_uring sys-calls
         "--network=host "  # required to get access to IAM credentials
         f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output "
-        f"-e FASTTEST_SOURCE=/ClickHouse "
+        f"-e FASTTEST_SOURCE=/repo "
         f"-e FASTTEST_CMAKE_FLAGS='-DCOMPILER_CACHE=sccache' "
         f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} "
         f"-e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 "
         f"-e SCCACHE_BUCKET={S3_BUILDS_BUCKET} -e SCCACHE_S3_KEY_PREFIX=ccache/sccache "
         "-e stage=clone_submodules "
-        f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/ClickHouse "
-        f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt "
-        f"--volume={output_path}:/test_output {image}"
+        f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/repo "
+        f"--volume={output_path}:/test_output {image} /repo/tests/docker_scripts/fasttest_runner.sh"
     )
 
 
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index d08f98fa05f..ce2ead59d1a 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -119,24 +119,24 @@ def get_run_command(
     envs += [f"-e {e}" for e in additional_envs]
 
     env_str = " ".join(envs)
-    volume_with_broken_test = (
-        f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt "
-        if "analyzer" not in check_name
-        else ""
-    )
+
+    if "stateful" in check_name.lower():
+        run_script = "/repo/tests/docker_scripts/stateful_runner.sh"
+    elif "stateless" in check_name.lower():
+        run_script = "/repo/tests/docker_scripts/stateless_runner.sh"
+    else:
+        assert False
 
     return (
         f"docker run --rm --name func-tester --volume={builds_path}:/package_folder "
         # For dmesg and sysctl
         "--privileged "
-        f"{ci_logs_args}"
-        f"--volume={repo_path}/tests:/usr/share/clickhouse-test "
-        f"--volume={repo_path}/utils/grpc-client:/usr/share/clickhouse-utils/grpc-client "
-        f"{volume_with_broken_test}"
+        f"{ci_logs_args} "
+        f"--volume={repo_path}:/repo "
         f"--volume={result_path}:/test_output "
         f"--volume={server_log_path}:/var/log/clickhouse-server "
         "--security-opt seccomp=unconfined "  # required to issue io_uring sys-calls
-        f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
+        f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} {run_script}"
     )
 
 
diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py
index 85da601e379..f9656e60448 100644
--- a/tests/ci/stress_check.py
+++ b/tests/ci/stress_check.py
@@ -57,10 +57,16 @@ def get_run_command(
     additional_envs: List[str],
     ci_logs_args: str,
     image: DockerImage,
+    upgrade_check: bool,
 ) -> str:
     envs = [f"-e {e}" for e in additional_envs]
     env_str = " ".join(envs)
 
+    if upgrade_check:
+        run_script = "/repo/tests/docker_scripts/upgrade_runner.sh"
+    else:
+        run_script = "/repo/tests/docker_scripts/stress_runner.sh"
+
     cmd = (
         "docker run --cap-add=SYS_PTRACE "
         # For dmesg and sysctl
@@ -70,8 +76,8 @@ def get_run_command(
         f"{ci_logs_args}"
         f"--volume={build_path}:/package_folder "
         f"--volume={result_path}:/test_output "
-        f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
-        f"--volume={server_log_path}:/var/log/clickhouse-server {env_str} {image} "
+        f"--volume={repo_tests_path}/..:/repo "
+        f"--volume={server_log_path}:/var/log/clickhouse-server {env_str} {image} {run_script}"
     )
 
     return cmd
@@ -128,7 +134,7 @@ def process_results(
     return state, description, test_results, additional_files
 
 
-def run_stress_test(docker_image_name: str) -> None:
+def run_stress_test(upgrade_check: bool = False) -> None:
     logging.basicConfig(level=logging.INFO)
     for handler in logging.root.handlers:
         # pylint: disable=protected-access
@@ -148,7 +154,7 @@ def run_stress_test(docker_image_name: str) -> None:
 
     pr_info = PRInfo()
 
-    docker_image = pull_image(get_docker_image(docker_image_name))
+    docker_image = pull_image(get_docker_image("clickhouse/stress-test"))
 
     packages_path = temp_path / "packages"
     packages_path.mkdir(parents=True, exist_ok=True)
@@ -177,6 +183,7 @@ def run_stress_test(docker_image_name: str) -> None:
         additional_envs,
         ci_logs_args,
         docker_image,
+        upgrade_check,
     )
     logging.info("Going to run stress test: %s", run_command)
 
@@ -208,4 +215,4 @@ def run_stress_test(docker_image_name: str) -> None:
 
 
 if __name__ == "__main__":
-    run_stress_test("clickhouse/stress-test")
+    run_stress_test()
diff --git a/tests/ci/upgrade_check.py b/tests/ci/upgrade_check.py
index 83b6f9e299f..8662611dffe 100644
--- a/tests/ci/upgrade_check.py
+++ b/tests/ci/upgrade_check.py
@@ -1,4 +1,4 @@
 import stress_check
 
 if __name__ == "__main__":
-    stress_check.run_stress_test("clickhouse/upgrade-check")
+    stress_check.run_stress_test(upgrade_check=True)
diff --git a/docker/test/stateless/attach_gdb.lib b/tests/docker_scripts/attach_gdb.lib
similarity index 98%
rename from docker/test/stateless/attach_gdb.lib
rename to tests/docker_scripts/attach_gdb.lib
index 2f1375a2f0f..4170a19176c 100644
--- a/docker/test/stateless/attach_gdb.lib
+++ b/tests/docker_scripts/attach_gdb.lib
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # shellcheck source=./utils.lib
-source /utils.lib
+source /repo/tests/docker_scripts/utils.lib
 
 function attach_gdb_to_clickhouse()
 {
diff --git a/docker/test/stateful/create.sql b/tests/docker_scripts/create.sql
similarity index 100%
rename from docker/test/stateful/create.sql
rename to tests/docker_scripts/create.sql
diff --git a/docker/test/fasttest/run.sh b/tests/docker_scripts/fasttest_runner.sh
similarity index 99%
rename from docker/test/fasttest/run.sh
rename to tests/docker_scripts/fasttest_runner.sh
index 9920326b11c..1eaba2c7cdf 100755
--- a/docker/test/fasttest/run.sh
+++ b/tests/docker_scripts/fasttest_runner.sh
@@ -325,7 +325,7 @@ case "$stage" in
     ;&
 "run_tests")
     run_tests ||:
-    /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
+    /repo/tests/docker_scripts/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
         --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
         --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
     ;;
diff --git a/docker/test/util/process_functional_tests_result.py b/tests/docker_scripts/process_functional_tests_result.py
similarity index 92%
rename from docker/test/util/process_functional_tests_result.py
rename to tests/docker_scripts/process_functional_tests_result.py
index ec9e14b1430..1dc3090484c 100755
--- a/docker/test/util/process_functional_tests_result.py
+++ b/tests/docker_scripts/process_functional_tests_result.py
@@ -32,7 +32,7 @@ def process_test_log(log_path, broken_tests):
     success_finish = False
     test_results = []
     test_end = True
-    with open(log_path, "r") as test_file:
+    with open(log_path, "r", encoding="utf-8") as test_file:
         for line in test_file:
             original_line = line
             line = line.strip()
@@ -150,7 +150,7 @@ def process_result(result_path, broken_tests):
 
     if result_path and os.path.exists(result_path):
         (
-            total,
+            _total,
             skipped,
             unknown,
             failed,
@@ -191,11 +191,11 @@ def process_result(result_path, broken_tests):
         else:
             description = ""
 
-        description += "fail: {}, passed: {}".format(failed, success)
+        description += f"fail: {failed}, passed: {success}"
         if skipped != 0:
-            description += ", skipped: {}".format(skipped)
+            description += f", skipped: {skipped}"
         if unknown != 0:
-            description += ", unknown: {}".format(unknown)
+            description += f", unknown: {unknown}"
     else:
         state = "failure"
         description = "Output log doesn't exist"
@@ -205,10 +205,10 @@ def process_result(result_path, broken_tests):
 
 
 def write_results(results_file, status_file, results, status):
-    with open(results_file, "w") as f:
+    with open(results_file, "w", encoding="utf-8") as f:
         out = csv.writer(f, delimiter="\t")
         out.writerows(results)
-    with open(status_file, "w") as f:
+    with open(status_file, "w", encoding="utf-8") as f:
         out = csv.writer(f, delimiter="\t")
         out.writerow(status)
 
@@ -221,15 +221,15 @@ if __name__ == "__main__":
     parser.add_argument("--in-results-dir", default="/test_output/")
     parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
     parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
-    parser.add_argument("--broken-tests", default="/analyzer_tech_debt.txt")
+    parser.add_argument("--broken-tests", default="/repo/tests/analyzer_tech_debt.txt")
     args = parser.parse_args()
 
-    broken_tests = list()
+    broken_tests = []
     if os.path.exists(args.broken_tests):
-        logging.info(f"File {args.broken_tests} with broken tests found")
-        with open(args.broken_tests) as f:
+        print(f"File {args.broken_tests} with broken tests found")
+        with open(args.broken_tests, encoding="utf-8") as f:
             broken_tests = f.read().splitlines()
-        logging.info(f"Broken tests in the list: {len(broken_tests)}")
+        print(f"Broken tests in the list: {len(broken_tests)}")
 
     state, description, test_results = process_result(args.in_results_dir, broken_tests)
     logging.info("Result parsed")
diff --git a/docker/test/stateless/setup_hdfs_minicluster.sh b/tests/docker_scripts/setup_hdfs_minicluster.sh
similarity index 95%
rename from docker/test/stateless/setup_hdfs_minicluster.sh
rename to tests/docker_scripts/setup_hdfs_minicluster.sh
index 15a54f59096..622270ba5d5 100755
--- a/docker/test/stateless/setup_hdfs_minicluster.sh
+++ b/tests/docker_scripts/setup_hdfs_minicluster.sh
@@ -5,7 +5,7 @@ set -e -x -a -u
 
 ls -lha
 
-cd hadoop-3.3.1
+cd /hadoop-3.3.1
 
 export JAVA_HOME=/usr
 mkdir -p target/test/data
diff --git a/docker/test/stateless/setup_minio.sh b/tests/docker_scripts/setup_minio.sh
similarity index 98%
rename from docker/test/stateless/setup_minio.sh
rename to tests/docker_scripts/setup_minio.sh
index c1508df7e82..40e93e713a1 100755
--- a/docker/test/stateless/setup_minio.sh
+++ b/tests/docker_scripts/setup_minio.sh
@@ -143,7 +143,7 @@ main() {
   fi
   start_minio
   setup_minio "$1"
-  upload_data "${query_dir}" "${2:-/usr/share/clickhouse-test}"
+  upload_data "${query_dir}" "${2:-/repo/tests/}"
   setup_aws_credentials
 }
 
diff --git a/docker/test/stateful/run.sh b/tests/docker_scripts/stateful_runner.sh
similarity index 98%
rename from docker/test/stateful/run.sh
rename to tests/docker_scripts/stateful_runner.sh
index a489a4b6e14..86f6a299ad3 100755
--- a/docker/test/stateful/run.sh
+++ b/tests/docker_scripts/stateful_runner.sh
@@ -14,17 +14,17 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
 dpkg -i package_folder/clickhouse-server_*.deb
 dpkg -i package_folder/clickhouse-client_*.deb
 
-ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
+ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test
 
 # shellcheck disable=SC1091
-source /utils.lib
+source /repo/tests/docker_scripts/utils.lib
 
 # install test configs
-/usr/share/clickhouse-test/config/install.sh
+/repo/tests/config/install.sh
 
 azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence &
 
-./setup_minio.sh stateful
+/repo/tests/docker_scripts/setup_minio.sh stateful
 ./mc admin trace clickminio > /test_output/minio.log &
 MC_ADMIN_PID=$!
 
@@ -105,7 +105,7 @@ setup_logs_replication
 
 clickhouse-client --query "SHOW DATABASES"
 clickhouse-client --query "CREATE DATABASE datasets"
-clickhouse-client --multiquery < create.sql
+clickhouse-client --multiquery < /repo/tests/docker_scripts/create.sql
 clickhouse-client --query "SHOW TABLES FROM datasets"
 
 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
@@ -237,6 +237,7 @@ function run_tests()
         --hung-check
         --print-time
         --capture-client-stacktrace
+        --queries "/repo/tests/queries"
         "${ADDITIONAL_OPTIONS[@]}"
         "$SKIP_TESTS_OPTION"
     )
@@ -259,7 +260,7 @@ ls -la ./
 echo "Files in root directory"
 ls -la /
 
-/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
+/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
 
 sudo clickhouse stop ||:
 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
diff --git a/docker/test/stateless/run.sh b/tests/docker_scripts/stateless_runner.sh
similarity index 97%
rename from docker/test/stateless/run.sh
rename to tests/docker_scripts/stateless_runner.sh
index 6973eebbac3..671b1f5ca71 100755
--- a/docker/test/stateless/run.sh
+++ b/tests/docker_scripts/stateless_runner.sh
@@ -1,10 +1,13 @@
 #!/bin/bash
 
+# fail on errors, verbose and export all env variables
+set -e -x -a
+
 # shellcheck disable=SC1091
 source /setup_export_logs.sh
 
 # shellcheck source=../stateless/stress_tests.lib
-source /stress_tests.lib
+source /repo/tests/docker_scripts/stress_tests.lib
 
 # Avoid overlaps with previous runs
 dmesg --clear
@@ -39,20 +42,22 @@ if [[ -z "$BUGFIX_VALIDATE_CHECK" ]]; then
     chc --version || exit 1
 fi
 
-ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
+ln -sf /repo/tests/clickhouse-test /usr/bin/clickhouse-test
+
+export CLICKHOUSE_GRPC_CLIENT="/repo/utils/grpc-client/clickhouse-grpc-client.py"
 
 # shellcheck disable=SC1091
-source /attach_gdb.lib
+source /repo/tests/docker_scripts/attach_gdb.lib
 
 # shellcheck disable=SC1091
-source /utils.lib
+source /repo/tests/docker_scripts/utils.lib
 
 # install test configs
-/usr/share/clickhouse-test/config/install.sh
+/repo/tests/config/install.sh
 
-./setup_minio.sh stateless
+/repo/tests/docker_scripts/setup_minio.sh stateless
 
-./setup_hdfs_minicluster.sh
+/repo/tests/docker_scripts/setup_hdfs_minicluster.sh
 
 config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
 
@@ -316,6 +321,7 @@ function run_tests()
         --print-time
         --no-drop-if-fail
         --capture-client-stacktrace
+        --queries "/repo/tests/queries"
         --test-runs "$NUM_TRIES"
         "${ADDITIONAL_OPTIONS[@]}"
     )
@@ -341,7 +347,7 @@ ls -la ./
 echo "Files in root directory"
 ls -la /
 
-/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
+/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
 
 clickhouse-client -q "system flush logs" ||:
 
diff --git a/docker/test/stress/run.sh b/tests/docker_scripts/stress_runner.sh
old mode 100644
new mode 100755
similarity index 97%
rename from docker/test/stress/run.sh
rename to tests/docker_scripts/stress_runner.sh
index b21114e456f..7666398e10b
--- a/docker/test/stress/run.sh
+++ b/tests/docker_scripts/stress_runner.sh
@@ -3,26 +3,25 @@
 # shellcheck disable=SC2086
 # shellcheck disable=SC2024
 
+set -x
+
 # Avoid overlaps with previous runs
 dmesg --clear
 # shellcheck disable=SC1091
 source /setup_export_logs.sh
 
-set -x
-
-# we mount tests folder from repo to /usr/share
-ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
-ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
+ln -s /repo/tests/clickhouse-test/ci/stress.py /usr/bin/stress
+ln -s /repo/tests/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
 
 # Stress tests and upgrade check uses similar code that was placed
 # in a separate bash library. See tests/ci/stress_tests.lib
 # shellcheck source=../stateless/attach_gdb.lib
-source /attach_gdb.lib
+source /repo/tests/docker_scripts/attach_gdb.lib
 # shellcheck source=../stateless/stress_tests.lib
-source /stress_tests.lib
+source /repo/tests/docker_scripts/stress_tests.lib
 
 # shellcheck disable=SC1091
-source /utils.lib
+source /repo/tests/docker_scripts/utils.lib
 
 install_packages package_folder
 
@@ -55,7 +54,7 @@ export ZOOKEEPER_FAULT_INJECTION=1
 # available for dump via clickhouse-local
 configure
 
-./setup_minio.sh stateless # to have a proper environment
+/repo/tests/docker_scripts/setup_minio.sh stateless # to have a proper environment
 
 config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
 
@@ -64,7 +63,7 @@ start_server
 setup_logs_replication
 
 clickhouse-client --query "CREATE DATABASE datasets"
-clickhouse-client --multiquery < create.sql
+clickhouse-client --multiquery < /repo/tests/docker_scripts/create.sql
 clickhouse-client --query "SHOW TABLES FROM datasets"
 
 clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
@@ -267,7 +266,7 @@ fi
 
 start_server
 
-stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
+python3 /repo/tests/ci/stress.py --hung-check --drop-databases --output-folder /test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
     && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
     || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
 
diff --git a/docker/test/stateless/stress_tests.lib b/tests/docker_scripts/stress_tests.lib
similarity index 99%
rename from docker/test/stateless/stress_tests.lib
rename to tests/docker_scripts/stress_tests.lib
index 51aa299f7a6..e2b5d983fcb 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/tests/docker_scripts/stress_tests.lib
@@ -42,7 +42,7 @@ function configure()
     # install test configs
     export USE_DATABASE_ORDINARY=1
     export EXPORT_S3_STORAGE_POLICIES=1
-    /usr/share/clickhouse-test/config/install.sh
+    /repo/tests/config/install.sh
 
     # avoid too slow startup
     sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
diff --git a/docker/test/upgrade/run.sh b/tests/docker_scripts/upgrade_runner.sh
old mode 100644
new mode 100755
similarity index 96%
rename from docker/test/upgrade/run.sh
rename to tests/docker_scripts/upgrade_runner.sh
index e9768873f6a..ece75ebf782
--- a/docker/test/upgrade/run.sh
+++ b/tests/docker_scripts/upgrade_runner.sh
@@ -9,20 +9,20 @@ dmesg --clear
 set -x
 
 # we mount tests folder from repo to /usr/share
-ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
-ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
-ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
-ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
+ln -s /repo/tests/ci/stress.py /usr/bin/stress
+ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test
+ln -s /repo/tests/ci/download_release_packages.py /usr/bin/download_release_packages
+ln -s /repo/tests/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
 
 # Stress tests and upgrade check uses similar code that was placed
 # in a separate bash library. See tests/ci/stress_tests.lib
 # shellcheck source=../stateless/attach_gdb.lib
-source /attach_gdb.lib
+source /repo/tests/docker_scripts/attach_gdb.lib
 # shellcheck source=../stateless/stress_tests.lib
-source /stress_tests.lib
+source /repo/tests/docker_scripts/stress_tests.lib
 
 azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
-./setup_minio.sh stateless # to have a proper environment
+/repo/tests/docker_scripts/setup_minio.sh stateless # to have a proper environment
 
 echo "Get previous release tag"
 # shellcheck disable=SC2016
diff --git a/docker/test/stateless/utils.lib b/tests/docker_scripts/utils.lib
similarity index 100%
rename from docker/test/stateless/utils.lib
rename to tests/docker_scripts/utils.lib

From 67f1dc276f0af5657eebf2153ffca3be36462157 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 20 Aug 2024 08:37:33 +0000
Subject: [PATCH 254/363] Update version_date.tsv and changelogs after
 v23.8.16.16-lts

---
 docs/changelogs/v23.8.16.16-lts.md   | 38 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 2 files changed, 39 insertions(+)
 create mode 100644 docs/changelogs/v23.8.16.16-lts.md

diff --git a/docs/changelogs/v23.8.16.16-lts.md b/docs/changelogs/v23.8.16.16-lts.md
new file mode 100644
index 00000000000..9532db4fb0a
--- /dev/null
+++ b/docs/changelogs/v23.8.16.16-lts.md
@@ -0,0 +1,38 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.8.16.16-lts (b80cac57ead) FIXME as compared to v23.8.15.35-lts (060ff8e813a)
+
+#### Improvement
+* Backported in [#66962](https://github.com/ClickHouse/ClickHouse/issues/66962): Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Backported in [#65461](https://github.com/ClickHouse/ClickHouse/issues/65461): Reload certificate chain during certificate reload. [#61671](https://github.com/ClickHouse/ClickHouse/pull/61671) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)).
+* Backported in [#65880](https://github.com/ClickHouse/ClickHouse/issues/65880): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#65912](https://github.com/ClickHouse/ClickHouse/issues/65912): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#65351](https://github.com/ClickHouse/ClickHouse/issues/65351): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#66037](https://github.com/ClickHouse/ClickHouse/issues/66037): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Backported in [#65281](https://github.com/ClickHouse/ClickHouse/issues/65281): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#65368](https://github.com/ClickHouse/ClickHouse/issues/65368): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Backported in [#65782](https://github.com/ClickHouse/ClickHouse/issues/65782): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#65743](https://github.com/ClickHouse/ClickHouse/issues/65743): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#65926](https://github.com/ClickHouse/ClickHouse/issues/65926): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#65822](https://github.com/ClickHouse/ClickHouse/issues/65822): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
+* Backported in [#66322](https://github.com/ClickHouse/ClickHouse/issues/66322): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#66449](https://github.com/ClickHouse/ClickHouse/issues/66449): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#66717](https://github.com/ClickHouse/ClickHouse/issues/66717): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67320](https://github.com/ClickHouse/ClickHouse/issues/67320): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#65080](https://github.com/ClickHouse/ClickHouse/issues/65080): Follow up to [#56541](https://github.com/ClickHouse/ClickHouse/issues/56541). [#57141](https://github.com/ClickHouse/ClickHouse/pull/57141) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#64997](https://github.com/ClickHouse/ClickHouse/issues/64997): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)).
+* Backported in [#65913](https://github.com/ClickHouse/ClickHouse/issues/65913): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#66853](https://github.com/ClickHouse/ClickHouse/issues/66853): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
+* Backported in [#67072](https://github.com/ClickHouse/ClickHouse/issues/67072): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
+* Update version after release. [#67691](https://github.com/ClickHouse/ClickHouse/pull/67691) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 71a4a722a36..183e1087c1c 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -55,6 +55,7 @@ v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
 v23.8.16.40-lts	2024-08-02
+v23.8.16.16-lts	2024-08-20
 v23.8.15.35-lts	2024-06-14
 v23.8.14.6-lts	2024-05-02
 v23.8.13.25-lts	2024-04-26

From 185038beb00617da3b825fc464efd694c179f9f8 Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Fri, 16 Aug 2024 16:15:27 +0000
Subject: [PATCH 255/363] Add encrypted local named collections

---
 .../NamedCollectionsMetadataStorage.cpp       | 111 +++++++++++++++++-
 .../NamedCollectionsMetadataStorage.h         |   2 +
 2 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
index 36191b89e86..ae9b7776c94 100644
--- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
+++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
@@ -6,6 +6,8 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Core/Settings.h>
+#include <IO/FileEncryptionCommon.h>
+#include <IO/WriteBufferFromEncryptedFile.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/WriteHelpers.h>
@@ -14,6 +16,7 @@
 #include <Parsers/formatAST.h>
 #include <Interpreters/Context.h>
 #include <filesystem>
+#include <boost/algorithm/hex.hpp>
 
 namespace fs = std::filesystem;
 
@@ -74,9 +77,9 @@ public:
 };
 
 
-class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, private WithContext
+class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, protected WithContext
 {
-private:
+protected:
     std::string root_path;
 
 public:
@@ -168,7 +171,7 @@ public:
         return fs::remove(getPath(file_name));
     }
 
-private:
+protected:
     std::string getPath(const std::string & file_name) const
     {
         const auto file_name_as_path = fs::path(file_name);
@@ -178,6 +181,7 @@ private:
         return fs::path(root_path) / file_name_as_path;
     }
 
+private:
     /// Delete .tmp files. They could be left undeleted in case of
     /// some exception or abrupt server restart.
     void cleanup()
@@ -194,6 +198,97 @@ private:
     }
 };
 
+class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorage::LocalStorage
+{
+public:
+    LocalStorageEncrypted(ContextPtr context_, const std::string & path_)
+        : NamedCollectionsMetadataStorage::LocalStorage(context_, path_)
+    {
+        const auto & config = getContext()->getConfigRef();
+        auto key_hex = config.getRawString("named_collections_storage.key_hex", "");
+        try
+        {
+            key = boost::algorithm::unhex(key_hex);
+            key_fingerprint = FileEncryption::calculateKeyFingerprint(key);
+        }
+        catch (const std::exception &)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length");
+        }
+
+        algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr"));
+    }
+
+    std::string read(const std::string & file_name) const override
+    {
+        ReadBufferFromFile in(getPath(file_name));
+        Memory<> encrypted_buffer(in.getFileSize());
+
+        FileEncryption::Header header;
+        try
+        {
+            header.read(in);
+        }
+        catch (Exception & e)
+        {
+            e.addMessage("While reading the header of encrypted file " + quoteString(file_name));
+            throw;
+        }
+
+        size_t bytes_read = 0;
+        while (bytes_read < encrypted_buffer.size() && !in.eof())
+        {
+            bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read);
+        }
+
+        std::string decrypted_buffer;
+        decrypted_buffer.resize(bytes_read);
+        FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector);
+        encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data());
+
+        LOG_DEBUG(getLogger("PMO"), "Read named collection {}: {}", file_name, decrypted_buffer);
+        return decrypted_buffer;
+    }
+
+    void write(const std::string & file_name, const std::string & data, bool replace) override
+    {
+        if (!replace && fs::exists(file_name))
+        {
+            throw Exception(
+                ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
+                "Metadata file {} for named collection already exists",
+                file_name);
+        }
+
+        fs::create_directories(root_path);
+
+        auto tmp_path = getPath(file_name + ".tmp");
+
+        auto out = std::make_unique<WriteBufferFromFile>(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL);
+        FileEncryption::Header header{
+            .algorithm = algorithm,
+            .key_fingerprint = key_fingerprint,
+            .init_vector = FileEncryption::InitVector::random()
+        };
+        WriteBufferFromEncryptedFile out_encrypted(data.size(), std::move(out), key, header);
+        writeString(data, out_encrypted);
+
+        out_encrypted.next();
+        if (getContext()->getSettingsRef().fsync_metadata)
+            out_encrypted.sync();
+
+        LOG_DEBUG(getLogger("PMO"), "Wrote named collection {}: {} in plain text, encrypted {}", file_name, data, out_encrypted.buffer());
+
+        fs::rename(tmp_path, getPath(file_name));
+    }
+
+private:
+    std::string key;
+    UInt128 key_fingerprint;
+    FileEncryption::Algorithm algorithm;
+};
+
+
 
 class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext
 {
@@ -495,7 +590,7 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
     const auto & config = context_->getConfigRef();
     const auto storage_type = config.getString(named_collections_storage_config_path + ".type", "local");
 
-    if (storage_type == "local")
+    if (storage_type == "local" || storage_type == "local_encrypted")
     {
         const auto path = config.getString(
             named_collections_storage_config_path + ".path",
@@ -504,7 +599,13 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
         LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
                   "Using local storage for named collections at path: {}", path);
 
-        auto local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
+        std::unique_ptr<INamedCollectionsStorage> local_storage;
+
+        if (storage_type == "local")
+            local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
+        else if (storage_type == "local_encrypted")
+            local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorageEncrypted>(context_, path);
+
         return std::unique_ptr<NamedCollectionsMetadataStorage>(
             new NamedCollectionsMetadataStorage(std::move(local_storage), context_));
     }
diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h
index c3468fbc468..db2b48fcd23 100644
--- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h
+++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h
@@ -35,7 +35,9 @@ public:
 private:
     class INamedCollectionsStorage;
     class LocalStorage;
+    class LocalStorageEncrypted;
     class ZooKeeperStorage;
+    class ZooKeeperEncrypted;
 
     std::shared_ptr<INamedCollectionsStorage> storage;
 

From c269e0f71ce5198dfd7cce2d024164b452c5ea82 Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Mon, 19 Aug 2024 10:13:49 +0000
Subject: [PATCH 256/363] Add encrypted named collections for ZooKeeper

Consolidate the code so that Local and ZooKeeper storages
use the same logic which uses memory buffers to encrypt
and decrypt data.
---
 .../NamedCollectionsMetadataStorage.cpp       | 225 ++++++++++--------
 .../NamedCollectionsMetadataStorage.h         |   2 +-
 2 files changed, 123 insertions(+), 104 deletions(-)

diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
index ae9b7776c94..b8269d2d55a 100644
--- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
+++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
@@ -7,9 +7,10 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Core/Settings.h>
 #include <IO/FileEncryptionCommon.h>
-#include <IO/WriteBufferFromEncryptedFile.h>
-#include <IO/WriteBufferFromFile.h>
 #include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromString.h>
 #include <IO/WriteHelpers.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/ParserCreateQuery.h>
@@ -129,6 +130,11 @@ public:
         ReadBufferFromFile in(getPath(file_name));
         std::string data;
         readStringUntilEOF(data, in);
+        return readHook(data);
+    }
+
+    virtual std::string readHook(const std::string & data) const
+    {
         return data;
     }
 
@@ -145,8 +151,9 @@ public:
         fs::create_directories(root_path);
 
         auto tmp_path = getPath(file_name + ".tmp");
-        WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL);
-        writeString(data, out);
+        auto write_data = writeHook(data);
+        WriteBufferFromFile out(tmp_path, write_data.size(), O_WRONLY | O_CREAT | O_EXCL);
+        writeString(write_data, out);
 
         out.next();
         if (getContext()->getSettingsRef().fsync_metadata)
@@ -156,6 +163,11 @@ public:
         fs::rename(tmp_path, getPath(file_name));
     }
 
+    virtual std::string writeHook(const std::string & data) const
+    {
+        return data;
+    }
+
     void remove(const std::string & file_name) override
     {
         if (!removeIfExists(file_name))
@@ -198,99 +210,7 @@ private:
     }
 };
 
-class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorage::LocalStorage
-{
-public:
-    LocalStorageEncrypted(ContextPtr context_, const std::string & path_)
-        : NamedCollectionsMetadataStorage::LocalStorage(context_, path_)
-    {
-        const auto & config = getContext()->getConfigRef();
-        auto key_hex = config.getRawString("named_collections_storage.key_hex", "");
-        try
-        {
-            key = boost::algorithm::unhex(key_hex);
-            key_fingerprint = FileEncryption::calculateKeyFingerprint(key);
-        }
-        catch (const std::exception &)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length");
-        }
-
-        algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr"));
-    }
-
-    std::string read(const std::string & file_name) const override
-    {
-        ReadBufferFromFile in(getPath(file_name));
-        Memory<> encrypted_buffer(in.getFileSize());
-
-        FileEncryption::Header header;
-        try
-        {
-            header.read(in);
-        }
-        catch (Exception & e)
-        {
-            e.addMessage("While reading the header of encrypted file " + quoteString(file_name));
-            throw;
-        }
-
-        size_t bytes_read = 0;
-        while (bytes_read < encrypted_buffer.size() && !in.eof())
-        {
-            bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read);
-        }
-
-        std::string decrypted_buffer;
-        decrypted_buffer.resize(bytes_read);
-        FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector);
-        encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data());
-
-        LOG_DEBUG(getLogger("PMO"), "Read named collection {}: {}", file_name, decrypted_buffer);
-        return decrypted_buffer;
-    }
-
-    void write(const std::string & file_name, const std::string & data, bool replace) override
-    {
-        if (!replace && fs::exists(file_name))
-        {
-            throw Exception(
-                ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
-                "Metadata file {} for named collection already exists",
-                file_name);
-        }
-
-        fs::create_directories(root_path);
-
-        auto tmp_path = getPath(file_name + ".tmp");
-
-        auto out = std::make_unique<WriteBufferFromFile>(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL);
-        FileEncryption::Header header{
-            .algorithm = algorithm,
-            .key_fingerprint = key_fingerprint,
-            .init_vector = FileEncryption::InitVector::random()
-        };
-        WriteBufferFromEncryptedFile out_encrypted(data.size(), std::move(out), key, header);
-        writeString(data, out_encrypted);
-
-        out_encrypted.next();
-        if (getContext()->getSettingsRef().fsync_metadata)
-            out_encrypted.sync();
-
-        LOG_DEBUG(getLogger("PMO"), "Wrote named collection {}: {} in plain text, encrypted {}", file_name, data, out_encrypted.buffer());
-
-        fs::rename(tmp_path, getPath(file_name));
-    }
-
-private:
-    std::string key;
-    UInt128 key_fingerprint;
-    FileEncryption::Algorithm algorithm;
-};
-
-
-
-class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext
+class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, protected WithContext
 {
 private:
     std::string root_path;
@@ -370,18 +290,25 @@ public:
 
     std::string read(const std::string & file_name) const override
     {
-        return getClient()->get(getPath(file_name));
+        auto data = getClient()->get(getPath(file_name));
+        return readHook(data);
+    }
+
+    virtual std::string readHook(const std::string & data) const
+    {
+        return data;
     }
 
     void write(const std::string & file_name, const std::string & data, bool replace) override
     {
+        auto write_data = writeHook(data);
         if (replace)
         {
-            getClient()->createOrUpdate(getPath(file_name), data, zkutil::CreateMode::Persistent);
+            getClient()->createOrUpdate(getPath(file_name), write_data, zkutil::CreateMode::Persistent);
         }
         else
         {
-            auto code = getClient()->tryCreate(getPath(file_name), data, zkutil::CreateMode::Persistent);
+            auto code = getClient()->tryCreate(getPath(file_name), write_data, zkutil::CreateMode::Persistent);
 
             if (code == Coordination::Error::ZNODEEXISTS)
             {
@@ -393,6 +320,11 @@ public:
         }
     }
 
+    virtual std::string writeHook(const std::string & data) const
+    {
+        return data;
+    }
+
     void remove(const std::string & file_name) override
     {
         getClient()->remove(getPath(file_name));
@@ -429,6 +361,89 @@ private:
     }
 };
 
+template <typename BaseMetadataStorage>
+class NamedCollectionsMetadataStorageEncrypted : public BaseMetadataStorage
+{
+public:
+    NamedCollectionsMetadataStorageEncrypted(ContextPtr context_, const std::string & path_)
+        : BaseMetadataStorage(context_, path_)
+    {
+        const auto & config = BaseMetadataStorage::getContext()->getConfigRef();
+        auto key_hex = config.getRawString("named_collections_storage.key_hex", "");
+        try
+        {
+            key = boost::algorithm::unhex(key_hex);
+            key_fingerprint = FileEncryption::calculateKeyFingerprint(key);
+        }
+        catch (const std::exception &)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length");
+        }
+
+        algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr"));
+    }
+
+    std::string readHook(const std::string & data) const override
+    {
+        ReadBufferFromString in(data);
+        Memory<> encrypted_buffer(data.length());
+
+        FileEncryption::Header header;
+        try
+        {
+            header.read(in);
+        }
+        catch (Exception & e)
+        {
+            e.addMessage("While reading the header of encrypted data");
+            throw;
+        }
+
+        size_t bytes_read = 0;
+        while (bytes_read < encrypted_buffer.size() && !in.eof())
+        {
+            bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read);
+        }
+
+        std::string decrypted_buffer;
+        decrypted_buffer.resize(bytes_read);
+        FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector);
+        encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data());
+
+        return decrypted_buffer;
+    }
+
+    std::string writeHook(const std::string & data) const override
+    {
+        FileEncryption::Header header{
+            .algorithm = algorithm,
+            .key_fingerprint = key_fingerprint,
+            .init_vector = FileEncryption::InitVector::random()
+        };
+
+        FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector);
+        WriteBufferFromOwnString out;
+        header.write(out);
+        encryptor.encrypt(data.data(), data.size(), out);
+        return std::string(out.str());
+    }
+
+private:
+    std::string key;
+    UInt128 key_fingerprint;
+    FileEncryption::Algorithm algorithm;
+};
+
+class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::LocalStorage>
+{
+    using NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::LocalStorage>::NamedCollectionsMetadataStorageEncrypted;
+};
+
+class NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::ZooKeeperStorage>
+{
+    using NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::ZooKeeperStorage>::NamedCollectionsMetadataStorageEncrypted;
+};
+
 NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage(
     std::shared_ptr<INamedCollectionsStorage> storage_,
     ContextPtr context_)
@@ -600,7 +615,6 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
                   "Using local storage for named collections at path: {}", path);
 
         std::unique_ptr<INamedCollectionsStorage> local_storage;
-
         if (storage_type == "local")
             local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
         else if (storage_type == "local_encrypted")
@@ -609,10 +623,15 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
         return std::unique_ptr<NamedCollectionsMetadataStorage>(
             new NamedCollectionsMetadataStorage(std::move(local_storage), context_));
     }
-    if (storage_type == "zookeeper" || storage_type == "keeper")
+    if (storage_type == "zookeeper" || storage_type == "keeper" || storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted")
     {
         const auto path = config.getString(named_collections_storage_config_path + ".path");
-        auto zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
+
+        std::unique_ptr<INamedCollectionsStorage> zk_storage;
+        if (storage_type == "zookeeper" || storage_type == "keeper")
+            zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
+        else if (storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted")
+            zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted>(context_, path);
 
         LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
                   "Using zookeeper storage for named collections at path: {}", path);
diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h
index db2b48fcd23..52805e8359d 100644
--- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h
+++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h
@@ -37,7 +37,7 @@ private:
     class LocalStorage;
     class LocalStorageEncrypted;
     class ZooKeeperStorage;
-    class ZooKeeperEncrypted;
+    class ZooKeeperStorageEncrypted;
 
     std::shared_ptr<INamedCollectionsStorage> storage;
 

From 0ccbb554b9d0b7055415569559e029060261243e Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Tue, 20 Aug 2024 10:58:14 +0200
Subject: [PATCH 257/363] Update 02995_index_7.sh

---
 tests/queries/0_stateless/02995_index_7.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02995_index_7.sh b/tests/queries/0_stateless/02995_index_7.sh
index a5fdd98b2f8..7a03b0d4c1a 100755
--- a/tests/queries/0_stateless/02995_index_7.sh
+++ b/tests/queries/0_stateless/02995_index_7.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
+# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage, no-distributed-cache
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From abf556d641f6a29b32dbe4f24fec7a8ae90990b8 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 20 Aug 2024 11:39:47 +0200
Subject: [PATCH 258/363] review changes

---
 .../functions/type-conversion-functions.md    | 132 +++++++++---------
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index a03394be226..8acb2cd30b1 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -4878,7 +4878,7 @@ toIntervalYear(n)
 
 **Arguments**
 
-- `n` — Number of years. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of years. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -4890,17 +4890,17 @@ Query:
 
 ``` sql
 WITH
-    toDate('2019-01-01') AS date,
+    toDate('2024-06-15') AS date,
     toIntervalYear(1) AS interval_to_year
-SELECT date + interval_to_year
+SELECT date + interval_to_year AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_year)─┐
-│                   2020-01-01 │
-└──────────────────────────────┘
+┌─────result─┐
+│ 2025-06-15 │
+└────────────┘
 ```
 
 ## toIntervalQuarter
@@ -4915,7 +4915,7 @@ toIntervalQuarter(n)
 
 **Arguments**
 
-- `n` — Number of quarters. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of quarters. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -4927,17 +4927,17 @@ Query:
 
 ``` sql
 WITH
-    toDate('2019-01-01') AS date,
+    toDate('2024-06-15') AS date,
     toIntervalQuarter(1) AS interval_to_quarter
-SELECT date + interval_to_quarter
+SELECT date + interval_to_quarter AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_quarter)─┐
-│                      2019-04-01 │
-└─────────────────────────────────┘
+┌─────result─┐
+│ 2024-09-15 │
+└────────────┘
 ```
 
 ## toIntervalMonth
@@ -4952,7 +4952,7 @@ toIntervalMonth(n)
 
 **Arguments**
 
-- `n` — Number of months. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of months. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -4964,17 +4964,17 @@ Query:
 
 ``` sql
 WITH
-    toDate('2019-01-01') AS date,
+    toDate('2024-06-15') AS date,
     toIntervalMonth(1) AS interval_to_month
-SELECT date + interval_to_month
+SELECT date + interval_to_month AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_month)─┐
-│                    2019-02-01 │
-└───────────────────────────────┘
+┌─────result─┐
+│ 2024-07-15 │
+└────────────┘
 ```
 
 ## toIntervalWeek
@@ -4989,7 +4989,7 @@ toIntervalWeek(n)
 
 **Arguments**
 
-- `n` — Number of weeks. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of weeks. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5001,17 +5001,17 @@ Query:
 
 ``` sql
 WITH
-    toDate('2019-01-01') AS date,
+    toDate('2024-06-15') AS date,
     toIntervalWeek(1) AS interval_to_week
-SELECT date + interval_to_week
+SELECT date + interval_to_week AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_week)─┐
-│                   2019-01-08 │
-└──────────────────────────────┘
+┌─────result─┐
+│ 2024-06-22 │
+└────────────┘
 ```
 
 ## toIntervalDay
@@ -5026,7 +5026,7 @@ toIntervalDay(n)
 
 **Arguments**
 
-- `n` — Number of days. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of days. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5038,17 +5038,17 @@ Query:
 
 ``` sql
 WITH
-    toDate('2019-01-01') AS date,
+    toDate('2024-06-15') AS date,
     toIntervalDay(5) AS interval_to_days
-SELECT date + interval_to_days
+SELECT date + interval_to_days AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_days)─┐
-│                   2019-01-06 │
-└──────────────────────────────┘
+┌─────result─┐
+│ 2024-06-20 │
+└────────────┘
 ```
 
 ## toIntervalHour
@@ -5063,7 +5063,7 @@ toIntervalHour(n)
 
 **Arguments**
 
-- `n` — Number of hours. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of hours. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5075,17 +5075,17 @@ Query:
 
 ``` sql
 WITH
-    toDate('2019-01-01') AS date,
+    toDate('2024-06-15') AS date,
     toIntervalHour(12) AS interval_to_hours
-SELECT date + interval_to_hours
+SELECT date + interval_to_hours AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_hours)─┐
-│           2019-01-01 12:00:00 │
-└───────────────────────────────┘
+┌──────────────result─┐
+│ 2024-06-15 12:00:00 │
+└─────────────────────┘
 ```
 
 ## toIntervalMinute
@@ -5100,7 +5100,7 @@ toIntervalMinute(n)
 
 **Arguments**
 
-- `n` — Number of minutes. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of minutes. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5112,17 +5112,17 @@ Query:
 
 ``` sql
 WITH
-    toDate('2019-01-01') AS date,
+    toDate('2024-06-15') AS date,
     toIntervalMinute(12) AS interval_to_minutes
-SELECT date + interval_to_minutes
+SELECT date + interval_to_minutes AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_minutes)─┐
-│             2019-01-01 00:12:00 │
-└─────────────────────────────────┘
+┌──────────────result─┐
+│ 2024-06-15 00:12:00 │
+└─────────────────────┘
 ```
 
 ## toIntervalSecond
@@ -5137,7 +5137,7 @@ toIntervalSecond(n)
 
 **Arguments**
 
-- `n` — Number of seconds. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of seconds. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5149,17 +5149,17 @@ Query:
 
 ``` sql
 WITH
-    toDate('2019-01-01') AS date,
+    toDate('2024-06-15') AS date,
     toIntervalSecond(30) AS interval_to_seconds
-SELECT date + interval_to_seconds
+SELECT date + interval_to_seconds AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_seconds)─┐
-│             2019-01-01 00:00:30 │
-└─────────────────────────────────┘
+┌──────────────result─┐
+│ 2024-06-15 00:00:30 │
+└─────────────────────┘
 ```
 
 ## toIntervalMillisecond
@@ -5174,7 +5174,7 @@ toIntervalMillisecond(n)
 
 **Arguments**
 
-- `n` — Number of milliseconds. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of milliseconds. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5186,17 +5186,17 @@ Query:
 
 ``` sql
 WITH
-    toDateTime('2019-01-01') AS date,
+    toDateTime('2024-06-15') AS date,
     toIntervalMillisecond(30) AS interval_to_milliseconds
-SELECT date + interval_to_milliseconds
+SELECT date + interval_to_milliseconds AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_milliseconds)─┐
-│              2019-01-01 00:00:00.030 │
-└──────────────────────────────────────┘
+┌──────────────────result─┐
+│ 2024-06-15 00:00:00.030 │
+└─────────────────────────┘
 ```
 
 ## toIntervalMicrosecond
@@ -5211,7 +5211,7 @@ toIntervalMicrosecond(n)
 
 **Arguments**
 
-- `n` — Number of microseconds. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of microseconds. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5223,17 +5223,17 @@ Query:
 
 ``` sql
 WITH
-    toDateTime('2019-01-01') AS date,
+    toDateTime('2024-06-15') AS date,
     toIntervalMicrosecond(30) AS interval_to_microseconds
-SELECT date + interval_to_microseconds
+SELECT date + interval_to_microseconds AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_microseconds)─┐
-│           2019-01-01 00:00:00.000030 │
-└──────────────────────────────────────┘
+┌─────────────────────result─┐
+│ 2024-06-15 00:00:00.000030 │
+└────────────────────────────┘
 ```
 
 ## toIntervalNanosecond
@@ -5248,7 +5248,7 @@ toIntervalNanosecond(n)
 
 **Arguments**
 
-- `n` — Number of nanoseconds. Positive integer number. [Int*](../data-types/int-uint.md).
+- `n` — Number of nanoseconds. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5260,17 +5260,17 @@ Query:
 
 ``` sql
 WITH
-    toDateTime('2019-01-01') AS date,
+    toDateTime('2024-06-15') AS date,
     toIntervalNanosecond(30) AS interval_to_nanoseconds
-SELECT date + interval_to_nanoseconds
+SELECT date + interval_to_nanoseconds AS result
 ```
 
 Result:
 
 ```response
-┌─plus(date, interval_to_nanoseconds)─┐
-│       2019-01-01 00:00:00.000000030 │
-└─────────────────────────────────────┘
+┌────────────────────────result─┐
+│ 2024-06-15 00:00:00.000000030 │
+└───────────────────────────────┘
 ```
 
 ## parseDateTime

From 68f95f2bd1197f3094b019a9ebf0b77d6a619afc Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 20 Aug 2024 10:29:57 +0000
Subject: [PATCH 259/363] Update version_date.tsv and changelogs after
 v24.3.8.13-lts

---
 docs/changelogs/v24.3.8.13-lts.md    | 16 ++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 2 files changed, 17 insertions(+)
 create mode 100644 docs/changelogs/v24.3.8.13-lts.md

diff --git a/docs/changelogs/v24.3.8.13-lts.md b/docs/changelogs/v24.3.8.13-lts.md
new file mode 100644
index 00000000000..6fbceacd624
--- /dev/null
+++ b/docs/changelogs/v24.3.8.13-lts.md
@@ -0,0 +1,16 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.3.8.13-lts (84bbfc70f5d) FIXME as compared to v24.3.7.30-lts (c8a28cf4331)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68562](https://github.com/ClickHouse/ClickHouse/issues/68562): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68114](https://github.com/ClickHouse/ClickHouse/issues/68114): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67989](https://github.com/ClickHouse/ClickHouse/issues/67989): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68335](https://github.com/ClickHouse/ClickHouse/issues/68335): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68392](https://github.com/ClickHouse/ClickHouse/issues/68392): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 183e1087c1c..776a53ec01c 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -13,6 +13,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02
 v24.3.5.46-lts	2024-07-03

From 0407171fce808a6eafee8d46e517008d74109d64 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 20 Aug 2024 12:53:31 +0200
Subject: [PATCH 260/363] update argument types and fix style

---
 .../functions/type-conversion-functions.md    | 22 +++++++++---------
 .../aspell-ignore/en/aspell-dict.txt          | 23 +++++++++++++++++++
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 8acb2cd30b1..d6b93d8ecdb 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -4878,7 +4878,7 @@ toIntervalYear(n)
 
 **Arguments**
 
-- `n` — Number of years. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of years. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -4915,7 +4915,7 @@ toIntervalQuarter(n)
 
 **Arguments**
 
-- `n` — Number of quarters. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of quarters. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -4952,7 +4952,7 @@ toIntervalMonth(n)
 
 **Arguments**
 
-- `n` — Number of months. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of months. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -4989,7 +4989,7 @@ toIntervalWeek(n)
 
 **Arguments**
 
-- `n` — Number of weeks. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of weeks. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5026,7 +5026,7 @@ toIntervalDay(n)
 
 **Arguments**
 
-- `n` — Number of days. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of days. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5063,7 +5063,7 @@ toIntervalHour(n)
 
 **Arguments**
 
-- `n` — Number of hours. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of hours. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5100,7 +5100,7 @@ toIntervalMinute(n)
 
 **Arguments**
 
-- `n` — Number of minutes. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of minutes. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5137,7 +5137,7 @@ toIntervalSecond(n)
 
 **Arguments**
 
-- `n` — Number of seconds. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of seconds. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5174,7 +5174,7 @@ toIntervalMillisecond(n)
 
 **Arguments**
 
-- `n` — Number of milliseconds. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of milliseconds. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5211,7 +5211,7 @@ toIntervalMicrosecond(n)
 
 **Arguments**
 
-- `n` — Number of microseconds. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of microseconds. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
@@ -5248,7 +5248,7 @@ toIntervalNanosecond(n)
 
 **Arguments**
 
-- `n` — Number of nanoseconds. Positive integer number or string representation thereof. [Int*](../data-types/int-uint.md)/[String](../data-types/string.md).
+- `n` — Number of nanoseconds. Integer numbers or string representations thereof, and float numbers. [(U)Int*](../data-types/int-uint.md)/[Float*](../data-types/float.md)/[String](../data-types/string.md).
 
 **Returned values**
 
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index b21ae0764c6..f4fe336f0fd 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -384,6 +384,18 @@ IntelliJ
 IntelliSense
 InterserverConnection
 InterserverThreads
+IntervalDay
+IntervalHour
+IntervalMicrosecond
+IntervalMillisecond
+IntervalMilliseconds
+IntervalMinute
+IntervalMonth
+IntervalNanosecond
+IntervalQuarter
+IntervalSecond
+IntervalWeek
+IntervalYear
 IsPentagon
 IsResClassIII
 IsValid
@@ -2715,6 +2727,17 @@ toISOWeek
 toISOYear
 toInt
 toInterval
+toIntervalDay
+toIntervalHour
+toIntervalMicrosecond
+toIntervalMillisecond
+toIntervalMinute
+toIntervalMonth
+toIntervalNanosecond
+toIntervalQuarter
+toIntervalSecond
+toIntervalWeek
+toIntervalYear
 toJSONString
 toLastDayOfMonth
 toLastDayOfWeek

From 08cb7ff8d866364013286043a8810329089b5832 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Tue, 20 Aug 2024 19:02:37 +0800
Subject: [PATCH 261/363] Add integration tests and delete stateless tests

---
 .../__init__.py                               |  0
 .../configs/config.d/cluster.xml              | 12 ++++
 .../configs/config.xml                        |  9 +++
 .../test_incorrect_datetime_format/test.py    | 57 +++++++++++++++++++
 ...fix_datetime_implicit_conversion.reference |  1 -
 ...03215_fix_datetime_implicit_conversion.sql | 15 -----
 6 files changed, 78 insertions(+), 16 deletions(-)
 create mode 100644 tests/integration/test_incorrect_datetime_format/__init__.py
 create mode 100644 tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml
 create mode 100644 tests/integration/test_incorrect_datetime_format/configs/config.xml
 create mode 100644 tests/integration/test_incorrect_datetime_format/test.py
 delete mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference
 delete mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql

diff --git a/tests/integration/test_incorrect_datetime_format/__init__.py b/tests/integration/test_incorrect_datetime_format/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml
new file mode 100644
index 00000000000..9c7f02c190f
--- /dev/null
+++ b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml
@@ -0,0 +1,12 @@
+<clickhouse>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_incorrect_datetime_format/configs/config.xml b/tests/integration/test_incorrect_datetime_format/configs/config.xml
new file mode 100644
index 00000000000..053b5d30418
--- /dev/null
+++ b/tests/integration/test_incorrect_datetime_format/configs/config.xml
@@ -0,0 +1,9 @@
+<clickhouse>
+    <logger>
+        <level>information</level>
+        <log>/var/log/clickhouse-server/clickhouse-server.log</log>
+        <errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
+        <size>1000M</size>
+        <count>10</count>
+    </logger>
+</clickhouse>
diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py
new file mode 100644
index 00000000000..eb6501fbec1
--- /dev/null
+++ b/tests/integration/test_incorrect_datetime_format/test.py
@@ -0,0 +1,57 @@
+import logging
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        cluster.add_instance(
+            "node",
+            main_configs=[
+                "configs/config.d/cluster.xml",
+            ]
+        )
+        logging.info("Starting cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_incorrect_datetime_format(cluster):
+    """
+    Test for an MSan issue which is caused by parsing incorrect datetime string
+    """
+
+    node = cluster.instances["node"]
+
+    table_name = "test_delete_race_leftovers"
+    additional_settings = {
+        # use another disk not to interfere with other tests
+        "storage_policy": "one_disk",
+        # always remove parts in parallel
+        "concurrent_part_removal_threshold": 1,
+    }
+
+    node.query("""
+        CREATE TABLE tab
+        (
+            a DateTime,
+            pk String
+        ) Engine = MergeTree() ORDER BY pk;
+        """
+    )
+
+    res = node.query("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'").strip()
+    assert res == "0"
+
+    error = node.query_and_get_error("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'").strip()
+    print(error)
+    assert "Cannot parse time component of DateTime 09:58:0" in error
+
+    error = node.query_and_get_error("SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'").strip()
+    print(error)
+    assert "Cannot convert string '2024-08-0 09:58:09' to type DateTime" in error
diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference
deleted file mode 100644
index 573541ac970..00000000000
--- a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql
deleted file mode 100644
index 70a8a3432a6..00000000000
--- a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql
+++ /dev/null
@@ -1,15 +0,0 @@
-DROP TABLE IF EXISTS tab SYNC;
-
-CREATE TABLE tab
-(
-    a DateTime,
-    pk String
-) Engine = MergeTree() ORDER BY pk;
-
-INSERT INTO tab select cast(number, 'DateTime'), generateUUIDv4() FROM system.numbers LIMIT 1;
-
-SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09';
-SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0';  -- { serverError CANNOT_PARSE_DATETIME }
-SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09';  -- { serverError TYPE_MISMATCH }
-
-DROP TABLE IF EXISTS tab SYNC;

From 0563e19cf336c629312bbc23f39c6dd8676238b5 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Tue, 20 Aug 2024 19:09:27 +0800
Subject: [PATCH 262/363] Little fix

---
 .../configs/config.d/cluster.xml                          | 3 +--
 tests/integration/test_incorrect_datetime_format/test.py  | 8 --------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml
index 9c7f02c190f..a27968fb3d2 100644
--- a/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml
+++ b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml
@@ -3,8 +3,7 @@
         <test_cluster>
             <shard>
                 <replica>
-                    <host>node1</host>
-                    <port>9000</port>
+                    <host>node</host>
                 </replica>
             </shard>
         </test_cluster>
diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py
index eb6501fbec1..c1803aa95a1 100644
--- a/tests/integration/test_incorrect_datetime_format/test.py
+++ b/tests/integration/test_incorrect_datetime_format/test.py
@@ -28,14 +28,6 @@ def test_incorrect_datetime_format(cluster):
 
     node = cluster.instances["node"]
 
-    table_name = "test_delete_race_leftovers"
-    additional_settings = {
-        # use another disk not to interfere with other tests
-        "storage_policy": "one_disk",
-        # always remove parts in parallel
-        "concurrent_part_removal_threshold": 1,
-    }
-
     node.query("""
         CREATE TABLE tab
         (

From 6a405b4aab1ce3f23555e2217856faecb414afca Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 20 Aug 2024 13:14:21 +0200
Subject: [PATCH 263/363] Update 01825_new_type_json_ghdata_insert_select.sh

---
 .../0_stateless/01825_new_type_json_ghdata_insert_select.sh   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
index 3f5fc91f8fc..b450e9827c2 100755
--- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
+++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-s3-storage, long
-# ^ no-s3-storage: it is memory-hungry
+# Tags: no-fasttest, no-s3-storage, long, no-asan
+# ^ no-s3-storage: it is memory-hungry, no-asan: too long
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 653c978dfa30bf1807912eccf8ba9a8d1f7a4da5 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Mon, 19 Aug 2024 12:08:23 +0200
Subject: [PATCH 264/363] autorelease to check builds and functional tests

---
 .github/actions/clean/action.yml    | 20 ++++++++++---
 .github/workflows/auto_releases.yml | 34 ++++++++++++++--------
 tests/ci/artifactory.py             |  2 ++
 tests/ci/auto_release.py            | 44 +++++++++++++++++++++--------
 tests/ci/ci_utils.py                |  2 +-
 5 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/.github/actions/clean/action.yml b/.github/actions/clean/action.yml
index 547738b17cc..8c22523cacf 100644
--- a/.github/actions/clean/action.yml
+++ b/.github/actions/clean/action.yml
@@ -1,11 +1,23 @@
 name: Clean runner
 description: Clean the runner's temp path on ending
+inputs:
+  images:
+    description: clean docker images
+    default: false
+    type: boolean
 runs:
   using: "composite"
   steps:
-    - name: Clean
+    - name: Clean Temp
       shell: bash
       run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "${{runner.temp}}"
+        sudo rm -fr "${{runner.temp}}"
+    - name: Clean Docker Containers
+      shell: bash
+      run: |
+          docker rm -vf $(docker ps -aq) ||:
+    - name: Clean Docker Images
+      if: ${{ inputs.images }}
+      shell: bash
+      run: |
+        docker rmi -f $(docker images -aq) ||:
diff --git a/.github/workflows/auto_releases.yml b/.github/workflows/auto_releases.yml
index 28483ea136f..2fdf4e30a70 100644
--- a/.github/workflows/auto_releases.yml
+++ b/.github/workflows/auto_releases.yml
@@ -14,7 +14,7 @@ on:
       dry-run:
         description: 'Dry run'
         required: false
-        default: true
+        default: false
         type: boolean
 
 jobs:
@@ -51,7 +51,11 @@ jobs:
               cat  /tmp/autorelease_params.json
               echo 'EOF'
           } >> "$GITHUB_OUTPUT"
-          echo "DRY_RUN=true" >> "$GITHUB_OUTPUT"
+          if [[ "${{ github.event_name }}" == "schedule" ]]; then
+            echo "DRY_RUN=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_OUTPUT"
+          fi
       - name: Post Release Branch statuses
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
@@ -74,14 +78,22 @@ jobs:
     secrets:
       ROBOT_CLICKHOUSE_COMMIT_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
 
-  PostSlackMessage:
-    needs: [AutoReleaseInfo]
+  CleanUp:
+    needs: [Releases]
     runs-on: [self-hosted, release-maker]
-    if: ${{ !cancelled() }}
     steps:
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-      - name: Post
-        run: |
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
+      - uses: ./.github/actions/clean
+        with:
+          images: true
+
+#  PostSlackMessage:
+#    needs: [Releases]
+#    runs-on: [self-hosted, release-maker]
+#    if: ${{ !cancelled() }}
+#    steps:
+#      - name: Check out repository code
+#        uses: ClickHouse/checkout@v1
+#      - name: Post
+#        run: |
+#          cd "$GITHUB_WORKSPACE/tests/ci"
+#          python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py
index f3d7d24f717..9457fa32ad3 100644
--- a/tests/ci/artifactory.py
+++ b/tests/ci/artifactory.py
@@ -143,6 +143,8 @@ class DebianArtifactory:
             print(f"  {cmd}")
             Shell.check(cmd, strict=True)
             Shell.check("sync")
+        time.sleep(10)
+        Shell.check(f"lsof +D R2MountPoint.MOUNT_POINT", verbose=True)
 
     def test_packages(self):
         Shell.check("docker pull ubuntu:latest", strict=True)
diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py
index 58cfc833afe..89714b2fb4b 100644
--- a/tests/ci/auto_release.py
+++ b/tests/ci/auto_release.py
@@ -1,5 +1,4 @@
 import argparse
-import copy
 import dataclasses
 import json
 import os
@@ -77,8 +76,10 @@ class AutoReleaseInfo:
             print(json.dumps(dataclasses.asdict(self), indent=2), file=f)
 
         # dump file for GH action matrix that is similar to the file above but with dropped not ready release branches
-        params = copy.deepcopy(self)
-        params.releases = [release for release in params.releases if release.ready]
+        params = dataclasses.asdict(self)
+        params["releases"] = [
+            release for release in params["releases"] if release["ready"]
+        ]
         with open(AUTORELEASE_MATRIX_PARAMS, "w", encoding="utf-8") as f:
             print(json.dumps(params, indent=2), file=f)
 
@@ -110,7 +111,6 @@ def _prepare(token):
         refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}"))
         assert refs
 
-        refs.sort(key=lambda ref: ref.ref)
         latest_release_tag_ref = refs[-1]
         latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha)
 
@@ -118,6 +118,10 @@ def _prepare(token):
             f"git rev-list --first-parent {latest_release_tag.tag}..origin/{pr.head.ref}",
         ).split("\n")
         commit_num = len(commits)
+        if latest_release_tag.tag.endswith("new"):
+            print("It's a new release branch - skip auto release for it")
+            continue
+
         print(
             f"Previous release [{latest_release_tag.tag}] was [{commit_num}] commits ago, date [{latest_release_tag.tagger.date}]"
         )
@@ -141,17 +145,33 @@ def _prepare(token):
                 commits_to_branch_head += 1
                 continue
 
-            commit_ci_status = CI.GH.get_commit_status_by_name(
-                token=token,
-                commit_sha=commit,
-                # handle old name for old releases
-                status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"),
-            )
+            # TODO: switch to check if CI is entirely green
+            statuses = [
+                CI.GH.get_commit_status_by_name(
+                    token=token,
+                    commit_sha=commit,
+                    # handle old name for old releases
+                    status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"),
+                ),
+                CI.GH.get_commit_status_by_name(
+                    token=token,
+                    commit_sha=commit,
+                    # handle old name for old releases
+                    status_name=CI.JobNames.STATELESS_TEST_RELEASE,
+                ),
+                CI.GH.get_commit_status_by_name(
+                    token=token,
+                    commit_sha=commit,
+                    # handle old name for old releases
+                    status_name=CI.JobNames.STATEFUL_TEST_RELEASE,
+                ),
+            ]
             commit_sha = commit
-            if commit_ci_status == SUCCESS:
+            if any(status == SUCCESS for status in statuses):
+                commit_ci_status = SUCCESS
                 break
 
-            print(f"CI status [{commit_ci_status}] - skip")
+            print(f"CI status [{statuses}] - skip")
             commits_to_branch_head += 1
 
         ready = False
diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py
index 97ab10f1b58..a4c0977f47c 100644
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@@ -118,7 +118,7 @@ class GH:
                 statuses = response.json()
                 for status in statuses:
                     if status["context"] in status_name:
-                        return status["state"]
+                        return status["state"]  # type: ignore
 
                 # Check if there is a next page
                 url = response.links.get("next", {}).get("url")

From 24eeaffa7a3ddbfa0fb7bc4546942bc18cab06af Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:02:09 +0200
Subject: [PATCH 265/363] init

---
 src/Storages/VirtualColumnUtils.cpp           |  11 +++++-----
 .../03203_hive_style_partitioning.reference   |   1 +
 .../03203_hive_style_partitioning.sh          |  19 +++++++++---------
 .../partitioning/a=b/a=b/sample.parquet       | Bin 0 -> 1308 bytes
 4 files changed, 17 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index d932f5cc469..edf50907752 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -136,14 +136,15 @@ std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(
 
     std::unordered_map<std::string, std::string> key_values;
     std::string key, value;
-    std::unordered_set<String> used_keys;
+    std::unordered_map<std::string, std::string> used_keys;
     while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
     {
-        if (used_keys.contains(key))
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {}, only unique keys are allowed", path, key);
-        used_keys.insert(key);
+        auto it = used_keys.find(key);
+        if (it != used_keys.end() && it->second != value)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key);
+        used_keys.insert({key, value});
 
-        auto col_name = "_" + key;
+        auto col_name = key;
         while (storage_columns.has(col_name))
             col_name = "_" + col_name;
         key_values[col_name] = value;
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index a4a2e48e046..12ffd17c102 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -39,6 +39,7 @@ Array(Int64)	LowCardinality(Float64)
 2070
 1
 1
+b
 TESTING THE URL PARTITIONING
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index db1f073d736..5a0bd482985 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -27,26 +27,27 @@ SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/c
 SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
 
-SELECT _number, _date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
-SELECT _array, _float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
-SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
-SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42;
+SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
+SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
+SELECT toTypeName(array), toTypeName(float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
+SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE number = 42;
 """
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
 SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
-SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
+SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
+SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "INCORRECT_DATA"
 
 $CLICKHOUSE_LOCAL -n -q """
@@ -78,7 +79,7 @@ SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/colum
 SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
+SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
@@ -109,7 +110,7 @@ SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column
 SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
 """
 
diff --git a/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001


From 99e8a0babc04c9bae0565977090bdbd01272de0e Mon Sep 17 00:00:00 2001
From: maxvostrikov <max.vostrikov@clickhouse.com>
Date: Tue, 20 Aug 2024 14:52:54 +0200
Subject: [PATCH 266/363] materialized_view_deduplication performance
 comparison test performance comparison test to check deduplication in
 MATERIALIZED VIEW's. Logic is similar to, but with a bigger insert
 tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql

---
 .../materialized_view_deduplication.xml       | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 tests/performance/materialized_view_deduplication.xml

diff --git a/tests/performance/materialized_view_deduplication.xml b/tests/performance/materialized_view_deduplication.xml
new file mode 100644
index 00000000000..621effd23dd
--- /dev/null
+++ b/tests/performance/materialized_view_deduplication.xml
@@ -0,0 +1,34 @@
+<test>
+    <settings>
+        <deduplicate_blocks_in_dependent_materialized_views>1</deduplicate_blocks_in_dependent_materialized_views>
+    </settings>
+
+    <create_query>
+        CREATE TABLE dst (`key` Int64, `value` String)
+        ENGINE = MergeTree ORDER BY tuple()
+        SETTINGS non_replicated_deduplication_window=1000;
+    </create_query>
+    <create_query>
+        CREATE TABLE mv_dst (`key` Int64, `value` String)
+        ENGINE = MergeTree ORDER BY tuple()
+        SETTINGS non_replicated_deduplication_window=1000;
+    </create_query>
+    <create_query>
+        CREATE MATERIALIZED VIEW mv_first TO mv_dst
+        AS SELECT 0 AS key, value AS value FROM dst;
+    </create_query>
+    <create_query>
+        CREATE MATERIALIZED VIEW mv_second TO mv_dst
+        AS SELECT 0 AS key, value AS value FROM dst;
+    </create_query>
+    <fill_query>INSERT INTO dst SELECT number as key, toString(number) from numbers(1000);</fill_query>
+
+    <query>
+        INSERT INTO dst SELECT number as key, toString(number) from numbers(1000);
+    </query>
+
+    <drop_query>DROP TABLE IF EXISTS dst</drop_query>
+    <drop_query>DROP TABLE IF EXISTS mv_dst</drop_query>
+    <drop_query>DROP TABLE IF EXISTS mv_first</drop_query>
+    <drop_query>DROP TABLE IF EXISTS mv_second</drop_query>
+</test>

From ee9080fba714fbe9ac482b4268ed164579ebda89 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Tue, 20 Aug 2024 15:11:06 +0200
Subject: [PATCH 267/363] Revert "Fix unexpected behavior with `FORMAT` and
 `SETTINGS` parsing"

---
 programs/client/Client.cpp                    |  3 -
 programs/server/Server.cpp                    |  2 +-
 src/Access/AccessControl.cpp                  |  8 +--
 src/Access/AccessControl.h                    |  5 +-
 src/Access/SettingsConstraints.cpp            |  8 +--
 src/Client/ClientBase.cpp                     | 60 ++++++++++++-----
 src/Interpreters/InterpreterSetQuery.cpp      | 34 +++++-----
 src/Interpreters/InterpreterSetQuery.h        |  2 +-
 src/Parsers/ParserQueryWithOutput.cpp         | 67 +++++++------------
 ...QueryWithOutputSettingsPushDownVisitor.cpp | 56 ++++++++++++++++
 .../QueryWithOutputSettingsPushDownVisitor.h  | 39 +++++++++++
 .../00857_global_joinsavel_table_alias.sql    |  1 +
 .../01401_FORMAT_SETTINGS.reference           |  4 +-
 .../0_stateless/01401_FORMAT_SETTINGS.sh      |  2 +-
 .../03003_compatibility_setting_bad_value.sql |  3 +-
 .../03172_format_settings_clauses.reference   | 14 ----
 .../03172_format_settings_clauses.sql         | 30 ---------
 17 files changed, 197 insertions(+), 141 deletions(-)
 create mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp
 create mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.h
 delete mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.reference
 delete mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.sql

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 39edaf3497e..25c94c56aa6 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1164,9 +1164,6 @@ void Client::processOptions(const OptionsDescription & options_description,
     /// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.)
     client_context = global_context;
     initClientContext();
-
-    /// Allow to pass-through unknown settings to the server.
-    client_context->getAccessControl().allowAllSettings();
 }
 
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 16fee378cf0..74228fae5a3 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1922,7 +1922,7 @@ try
     auto & access_control = global_context->getAccessControl();
     try
     {
-        access_control.setupFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
+        access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
     }
     catch (...)
     {
diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp
index d4f8c7bc859..95a467bbbe5 100644
--- a/src/Access/AccessControl.cpp
+++ b/src/Access/AccessControl.cpp
@@ -280,7 +280,7 @@ void AccessControl::shutdown()
 }
 
 
-void AccessControl::setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
+void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
                                         const zkutil::GetZooKeeper & get_zookeeper_function_)
 {
     if (config_.has("custom_settings_prefixes"))
@@ -868,10 +868,4 @@ const ExternalAuthenticators & AccessControl::getExternalAuthenticators() const
     return *external_authenticators;
 }
 
-
-void AccessControl::allowAllSettings()
-{
-    custom_settings_prefixes->registerPrefixes({""});
-}
-
 }
diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h
index 7d8ee1232d0..bfaf256ad48 100644
--- a/src/Access/AccessControl.h
+++ b/src/Access/AccessControl.h
@@ -57,7 +57,7 @@ public:
     void shutdown() override;
 
     /// Initializes access storage (user directories).
-    void setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
+    void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
                              const zkutil::GetZooKeeper & get_zookeeper_function_);
 
     /// Parses access entities from a configuration loaded from users.xml.
@@ -238,9 +238,6 @@ public:
     /// Gets manager of notifications.
     AccessChangesNotifier & getChangesNotifier();
 
-    /// Allow all setting names - this can be used in clients to pass-through unknown settings to the server.
-    void allowAllSettings();
-
 private:
     class ContextAccessCache;
     class CustomSettingsPrefixes;
diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp
index 7506e365035..a274f6b54f2 100644
--- a/src/Access/SettingsConstraints.cpp
+++ b/src/Access/SettingsConstraints.cpp
@@ -219,8 +219,8 @@ void SettingsConstraints::clamp(const Settings & current_settings, SettingsChang
         });
 }
 
-template <typename SettingsT>
-bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure)
+template <class T>
+bool getNewValueToCheck(const T & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure)
 {
     Field current_value;
     bool has_current_value = current_settings.tryGet(change.name, current_value);
@@ -230,12 +230,12 @@ bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & chan
         return false;
 
     if (throw_on_failure)
-        new_value = SettingsT::castValueUtil(change.name, change.value);
+        new_value = T::castValueUtil(change.name, change.value);
     else
     {
         try
         {
-            new_value = SettingsT::castValueUtil(change.name, change.value);
+            new_value = T::castValueUtil(change.name, change.value);
         }
         catch (...)
         {
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index e312f2a8158..01d03006eec 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -58,7 +58,6 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Interpreters/ProfileEventsExt.h>
-#include <Interpreters/InterpreterSetQuery.h>
 #include <IO/WriteBufferFromOStream.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/CompressionMethod.h>
@@ -1609,14 +1608,14 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
             auto metadata = storage->getInMemoryMetadataPtr();
             QueryPlan plan;
             storage->read(
-                plan,
-                sample.getNames(),
-                storage->getStorageSnapshot(metadata, client_context),
-                query_info,
-                client_context,
-                {},
-                client_context->getSettingsRef().max_block_size,
-                getNumberOfPhysicalCPUCores());
+                    plan,
+                    sample.getNames(),
+                    storage->getStorageSnapshot(metadata, client_context),
+                    query_info,
+                    client_context,
+                    {},
+                    client_context->getSettingsRef().max_block_size,
+                    getNumberOfPhysicalCPUCores());
 
             auto builder = plan.buildQueryPipeline(
                 QueryPlanOptimizationSettings::fromContext(client_context),
@@ -1893,19 +1892,48 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
     profile_events.watch.restart();
 
     {
-        /// Temporarily apply query settings to the context.
-        Settings old_settings = client_context->getSettingsCopy();
-        SCOPE_EXIT_SAFE(
-        {
-            client_context->setSettings(old_settings);
+        /// Temporarily apply query settings to context.
+        std::optional<Settings> old_settings;
+        SCOPE_EXIT_SAFE({
+            if (old_settings)
+                client_context->setSettings(*old_settings);
         });
-        InterpreterSetQuery::applySettingsFromQuery(parsed_query, client_context);
+
+        auto apply_query_settings = [&](const IAST & settings_ast)
+        {
+            if (!old_settings)
+                old_settings.emplace(client_context->getSettingsRef());
+            client_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
+            client_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
+        };
+
+        const auto * insert = parsed_query->as<ASTInsertQuery>();
+        if (const auto * select = parsed_query->as<ASTSelectQuery>(); select && select->settings())
+            apply_query_settings(*select->settings());
+        else if (const auto * select_with_union = parsed_query->as<ASTSelectWithUnionQuery>())
+        {
+            const ASTs & children = select_with_union->list_of_selects->children;
+            if (!children.empty())
+            {
+                // On the client it is enough to apply settings only for the
+                // last SELECT, since the only thing that is important to apply
+                // on the client is format settings.
+                const auto * last_select = children.back()->as<ASTSelectQuery>();
+                if (last_select && last_select->settings())
+                {
+                    apply_query_settings(*last_select->settings());
+                }
+            }
+        }
+        else if (const auto * query_with_output = parsed_query->as<ASTQueryWithOutput>(); query_with_output && query_with_output->settings_ast)
+            apply_query_settings(*query_with_output->settings_ast);
+        else if (insert && insert->settings_ast)
+            apply_query_settings(*insert->settings_ast);
 
         if (!connection->checkConnected(connection_parameters.timeouts))
             connect();
 
         ASTPtr input_function;
-        const auto * insert = parsed_query->as<ASTInsertQuery>();
         if (insert && insert->select)
             insert->tryFindInputFunction(input_function);
 
diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp
index 2ae35c4313b..7e68fc5c4c1 100644
--- a/src/Interpreters/InterpreterSetQuery.cpp
+++ b/src/Interpreters/InterpreterSetQuery.cpp
@@ -9,7 +9,6 @@
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 
-
 namespace DB
 {
 
@@ -46,7 +45,9 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel
     // It is flattened later, when we process UNION ALL/DISTINCT.
     const auto * last_select = children.back()->as<ASTSelectQuery>();
     if (last_select && last_select->settings())
-        InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(/* ignore_setting_constraints= */ false);
+    {
+        InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext();
+    }
 }
 
 void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_)
@@ -54,20 +55,10 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     if (!ast)
         return;
 
-    /// First apply the outermost settings. Then they could be overridden by deeper settings.
-    if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
-    {
-        if (query_with_output->settings_ast)
-            InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
-
-        if (const auto * create_query = ast->as<ASTCreateQuery>(); create_query && create_query->select)
-            applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context_);
-    }
-
     if (const auto * select_query = ast->as<ASTSelectQuery>())
     {
         if (auto new_settings = select_query->settings())
-            InterpreterSetQuery(new_settings, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
+            InterpreterSetQuery(new_settings, context_).executeForCurrentContext();
     }
     else if (const auto * select_with_union_query = ast->as<ASTSelectWithUnionQuery>())
     {
@@ -76,15 +67,28 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
     else if (const auto * explain_query = ast->as<ASTExplainQuery>())
     {
         if (explain_query->settings_ast)
-            InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
+            InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext();
 
         applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
     }
+    else if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
+    {
+        if (query_with_output->settings_ast)
+            InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext();
+
+        if (const auto * create_query = ast->as<ASTCreateQuery>())
+        {
+            if (create_query->select)
+            {
+                applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context_);
+            }
+        }
+    }
     else if (auto * insert_query = ast->as<ASTInsertQuery>())
     {
         context_->setInsertFormat(insert_query->format);
         if (insert_query->settings_ast)
-            InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
+            InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext();
     }
 }
 
diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h
index f50105c39f4..2438762f347 100644
--- a/src/Interpreters/InterpreterSetQuery.h
+++ b/src/Interpreters/InterpreterSetQuery.h
@@ -23,7 +23,7 @@ public:
     /** Set setting for current context (query context).
       * It is used for interpretation of SETTINGS clause in SELECT query.
       */
-    void executeForCurrentContext(bool ignore_setting_constraints);
+    void executeForCurrentContext(bool ignore_setting_constraints = false);
 
     bool supportsTransactions() const override { return true; }
 
diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp
index ac8f7d560e0..cb0c10cd1c9 100644
--- a/src/Parsers/ParserQueryWithOutput.cpp
+++ b/src/Parsers/ParserQueryWithOutput.cpp
@@ -25,6 +25,7 @@
 #include <Parsers/ParserTablePropertiesQuery.h>
 #include <Parsers/ParserWatchQuery.h>
 #include <Parsers/ParserDescribeCacheQuery.h>
+#include <Parsers/QueryWithOutputSettingsPushDownVisitor.h>
 #include <Parsers/Access/ParserShowAccessEntitiesQuery.h>
 #include <Parsers/Access/ParserShowAccessQuery.h>
 #include <Parsers/Access/ParserShowCreateAccessEntityQuery.h>
@@ -151,55 +152,37 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
 
     }
 
-    /// These two sections are allowed in an arbitrary order.
     ParserKeyword s_format(Keyword::FORMAT);
-    ParserKeyword s_settings(Keyword::SETTINGS);
 
-    /** Why: let's take the following example:
-      * SELECT 1 UNION ALL SELECT 2 FORMAT TSV
-      * Each subquery can be put in parentheses and have its own settings:
-      *   (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV
-      * And the whole query can have settings:
-      *   (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV SETTINGS e=f
-      * A single query with output is parsed in the same way as the UNION ALL chain:
-      *   SELECT 1 SETTINGS a=b FORMAT TSV SETTINGS e=f
-      * So while these forms have a slightly different meaning, they both exist:
-      *   SELECT 1 SETTINGS a=b FORMAT TSV
-      *   SELECT 1 FORMAT TSV SETTINGS e=f
-      * And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order.
-      * But while this work:
-      *   (SELECT 1) UNION ALL (SELECT 2) FORMAT TSV SETTINGS d=f
-      * This does not work automatically, unless we explicitly allow different orders:
-      *   (SELECT 1) UNION ALL (SELECT 2) SETTINGS d=f FORMAT TSV
-      * Inevitably, we also allow this:
-      *   SELECT 1 SETTINGS a=b SETTINGS d=f FORMAT TSV
-      *   ^^^^^^^^^^^^^^^^^^^^^
-      * Because this part is consumed into ASTSelectWithUnionQuery
-      * and the rest into ASTQueryWithOutput.
-      */
-
-    for (size_t i = 0; i < 2; ++i)
+    if (s_format.ignore(pos, expected))
     {
-        if (!query_with_output.format && s_format.ignore(pos, expected))
-        {
-            ParserIdentifier format_p;
+        ParserIdentifier format_p;
 
-            if (!format_p.parse(pos, query_with_output.format, expected))
-                return false;
-            setIdentifierSpecial(query_with_output.format);
+        if (!format_p.parse(pos, query_with_output.format, expected))
+            return false;
+        setIdentifierSpecial(query_with_output.format);
 
-            query_with_output.children.push_back(query_with_output.format);
-        }
-        else if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
+        query_with_output.children.push_back(query_with_output.format);
+    }
+
+    // SETTINGS key1 = value1, key2 = value2, ...
+    ParserKeyword s_settings(Keyword::SETTINGS);
+    if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
+    {
+        ParserSetQuery parser_settings(true);
+        if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
+            return false;
+        query_with_output.children.push_back(query_with_output.settings_ast);
+
+        // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery)
+        // Pass them manually, to apply in InterpreterSelectQuery::initSettings()
+        if (query->as<ASTSelectWithUnionQuery>())
         {
-            // SETTINGS key1 = value1, key2 = value2, ...
-            ParserSetQuery parser_settings(true);
-            if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
-                return false;
-            query_with_output.children.push_back(query_with_output.settings_ast);
+            auto settings = query_with_output.settings_ast->clone();
+            assert_cast<ASTSetQuery *>(settings.get())->print_in_format = false;
+            QueryWithOutputSettingsPushDownVisitor::Data data{settings};
+            QueryWithOutputSettingsPushDownVisitor(data).visit(query);
         }
-        else
-            break;
     }
 
     node = std::move(query);
diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp
new file mode 100644
index 00000000000..8cf0d0063ae
--- /dev/null
+++ b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp
@@ -0,0 +1,56 @@
+#include <Common/SettingsChanges.h>
+#include <Parsers/QueryWithOutputSettingsPushDownVisitor.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Parsers/ASTSelectQuery.h>
+#include <Parsers/ASTSetQuery.h>
+#include <Parsers/ASTSubquery.h>
+
+#include <iterator>
+#include <algorithm>
+
+namespace DB
+{
+
+bool QueryWithOutputSettingsPushDownMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
+{
+    if (node->as<ASTSelectWithUnionQuery>())
+        return true;
+    if (node->as<ASTSubquery>())
+        return true;
+    if (child->as<ASTSelectQuery>())
+        return true;
+    return false;
+}
+
+void QueryWithOutputSettingsPushDownMatcher::visit(ASTPtr & ast, Data & data)
+{
+    if (auto * select_query = ast->as<ASTSelectQuery>())
+        visit(*select_query, ast, data);
+}
+
+void QueryWithOutputSettingsPushDownMatcher::visit(ASTSelectQuery & select_query, ASTPtr &, Data & data)
+{
+    ASTPtr select_settings_ast = select_query.settings();
+    if (!select_settings_ast)
+    {
+        select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, data.settings_ast->clone());
+        return;
+    }
+
+    SettingsChanges & select_settings = select_settings_ast->as<ASTSetQuery &>().changes;
+    SettingsChanges & settings = data.settings_ast->as<ASTSetQuery &>().changes;
+
+    for (auto & setting : settings)
+    {
+        auto it = std::find_if(select_settings.begin(), select_settings.end(), [&](auto & select_setting)
+        {
+            return select_setting.name == setting.name;
+        });
+        if (it == select_settings.end())
+            select_settings.push_back(setting);
+        else
+            it->value = setting.value;
+    }
+}
+
+}
diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h
new file mode 100644
index 00000000000..fde8a07b555
--- /dev/null
+++ b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Interpreters/InDepthNodeVisitor.h>
+
+namespace DB
+{
+
+class ASTSelectQuery;
+struct SettingChange;
+class SettingsChanges;
+
+/// Pushdown SETTINGS clause that goes after FORMAT to the SELECT query:
+/// (since settings after FORMAT parsed separately not in the ParserSelectQuery but in ParserQueryWithOutput)
+///
+///     SELECT 1                             FORMAT Null SETTINGS max_block_size = 1 ->
+///     SELECT 1 SETTINGS max_block_size = 1 FORMAT Null SETTINGS max_block_size = 1
+///
+/// Otherwise settings after FORMAT will not be applied.
+class QueryWithOutputSettingsPushDownMatcher
+{
+public:
+    using Visitor = InDepthNodeVisitor<QueryWithOutputSettingsPushDownMatcher, true>;
+
+    struct Data
+    {
+        const ASTPtr & settings_ast;
+    };
+
+    static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
+    static void visit(ASTPtr & ast, Data & data);
+
+private:
+    static void visit(ASTSelectQuery &, ASTPtr &, Data &);
+};
+
+using QueryWithOutputSettingsPushDownVisitor = QueryWithOutputSettingsPushDownMatcher::Visitor;
+
+}
diff --git a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql
index 092b071cb48..2044a9b8d22 100644
--- a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql
+++ b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql
@@ -1,3 +1,4 @@
+
 DROP TABLE IF EXISTS local_table;
 DROP TABLE IF EXISTS other_table;
 
diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference
index a8b99666654..22405bf1866 100644
--- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference
+++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference
@@ -1,7 +1,7 @@
 1
 1
 1
+1
+1
 2
-1
-2
 2
diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh
index 173cc949500..b70c28422c9 100755
--- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh
+++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh
@@ -13,7 +13,7 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) FORMAT CSV SETTINGS max_block_size = 1'
 # push down append
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_compress_block_size = 1 FORMAT CSV SETTINGS max_block_size = 1'
-# not overwrite on push down
+# overwrite on push down (since these settings goes latest)
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 2 FORMAT CSV SETTINGS max_block_size = 1'
 # on push-down
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 1 FORMAT CSV'
diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
index 3a09eec7452..48e98798c51 100644
--- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
+++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql
@@ -1 +1,2 @@
-select 42 settings compatibility=NULL;  -- {clientError BAD_GET}
+select 42 settings compatibility=NULL;  -- {clientError BAD_ARGUMENTS}
+
diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.reference b/tests/queries/0_stateless/03172_format_settings_clauses.reference
deleted file mode 100644
index 8a98b137f4b..00000000000
--- a/tests/queries/0_stateless/03172_format_settings_clauses.reference
+++ /dev/null
@@ -1,14 +0,0 @@
-1
-2
-1
-2
-1
-2
-1
-1
-3
-3
-3
-3
-3
-1
diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.sql b/tests/queries/0_stateless/03172_format_settings_clauses.sql
deleted file mode 100644
index 0d1aa4dcfbb..00000000000
--- a/tests/queries/0_stateless/03172_format_settings_clauses.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-SET max_block_size = 10, max_threads = 1;
-
--- Take the following example:
-SELECT 1 UNION ALL SELECT 2 FORMAT TSV;
-
--- Each subquery can be put in parentheses and have its own settings:
-(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV;
-
--- And the whole query can have settings:
-(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV SETTINGS max_block_size = 3;
-
--- A single query with output is parsed in the same way as the UNION ALL chain:
-SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV SETTINGS max_block_size = 3;
-
--- So while these forms have a slightly different meaning, they both exist:
-SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV;
-SELECT getSetting('max_block_size') FORMAT TSV SETTINGS max_block_size = 3;
-
--- And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order.
--- But while this work:
-(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) FORMAT TSV SETTINGS max_block_size = 3;
-
--- This does not work automatically, unless we explicitly allow different orders:
-(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) SETTINGS max_block_size = 3 FORMAT TSV;
-
--- Inevitably, we allow this:
-SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 SETTINGS max_block_size = 3 FORMAT TSV;
-/*^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^*/
--- Because this part is consumed into ASTSelectWithUnionQuery
--- and the rest into ASTQueryWithOutput.

From 8b31388db5d88bbefee66d7bf4672945ce5ba4fe Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Tue, 20 Aug 2024 15:33:34 +0200
Subject: [PATCH 268/363] Update README.md

add Guangzho meetup, fix release call
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 2120a4d1211..bd9b8f66ad0 100644
--- a/README.md
+++ b/README.md
@@ -34,13 +34,13 @@ curl https://clickhouse.com/ | sh
 
 Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
 
-* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 29
+* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 20
 
 ## Upcoming Events
 
 Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
 
-* MORE COMING SOON!
+* [ClickHouse Guangzho User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
 
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

From 4f799467ecaf1bf10e39f7da59aa459ab1903810 Mon Sep 17 00:00:00 2001
From: Michal Tabaszewski <michalwtabaszewski@protonmail.com>
Date: Fri, 16 Aug 2024 22:54:09 +0200
Subject: [PATCH 269/363] Added removing query cache by tag.

---
 src/Common/CacheBase.h                      |  6 ++++++
 src/Common/ICachePolicy.h                   |  1 +
 src/Common/LRUCachePolicy.h                 | 17 +++++++++++++++++
 src/Common/SLRUCachePolicy.h                | 21 +++++++++++++++++++++
 src/Common/TTLCachePolicy.h                 | 17 +++++++++++++++++
 src/Interpreters/Cache/QueryCache.cpp       |  9 +++++++++
 src/Interpreters/Cache/QueryCache.h         |  1 +
 src/Interpreters/Context.cpp                |  8 ++++++++
 src/Interpreters/Context.h                  |  1 +
 src/Interpreters/InterpreterSystemQuery.cpp |  7 +++++--
 src/Parsers/ASTSystemQuery.h                |  1 +
 src/Parsers/CommonParsers.h                 |  1 +
 src/Parsers/ParserSystemQuery.cpp           | 10 ++++++++++
 13 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h
index a809136f451..e0f7ffbfd5f 100644
--- a/src/Common/CacheBase.h
+++ b/src/Common/CacheBase.h
@@ -197,6 +197,12 @@ public:
         cache_policy->remove(key);
     }
 
+    void removeWithPredicate(std::function<bool(const Key&, const MappedPtr &)> predicate)
+    {
+        std::lock_guard lock(mutex);
+        cache_policy->removeWithPredicate(predicate);
+    }
+
     size_t sizeInBytes() const
     {
         std::lock_guard lock(mutex);
diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h
index 301a5c6cbbd..71543e0aed0 100644
--- a/src/Common/ICachePolicy.h
+++ b/src/Common/ICachePolicy.h
@@ -55,6 +55,7 @@ public:
     virtual void set(const Key & key, const MappedPtr & mapped) = 0;
 
     virtual void remove(const Key & key) = 0;
+    virtual void removeWithPredicate(std::function<bool(const Key&, const MappedPtr &)> predicate) = 0;
 
     virtual void clear() = 0;
     virtual std::vector<KeyMapped> dump() const = 0;
diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h
index f833e46a821..39bf5067aea 100644
--- a/src/Common/LRUCachePolicy.h
+++ b/src/Common/LRUCachePolicy.h
@@ -68,6 +68,23 @@ public:
         current_size_in_bytes = 0;
     }
 
+    void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
+    {
+        for(auto it = cells.begin(); it != cells.end();)
+        {
+            if(predicate(it->first, it->second.value))
+            {
+                auto & cell = it->second;
+                current_size_in_bytes -= cell.size;
+                queue.erase(cell.queue_iterator);
+                cells.erase(it);
+                it = cells.erase(it);
+            }
+            else
+                ++it;
+        }
+    }
+
     void remove(const Key & key) override
     {
         auto it = cells.find(key);
diff --git a/src/Common/SLRUCachePolicy.h b/src/Common/SLRUCachePolicy.h
index 354ec1d36d6..62c6d72a3e8 100644
--- a/src/Common/SLRUCachePolicy.h
+++ b/src/Common/SLRUCachePolicy.h
@@ -95,6 +95,27 @@ public:
         cells.erase(it);
     }
 
+    void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
+    {
+        for(auto it = cells.begin(); it != cells.end();)
+        {
+            if(predicate(it->first, it->second.value))
+            {
+                auto & cell = it->second;
+
+                current_size_in_bytes -= cell.size;
+                if (cell.is_protected)
+                    current_protected_size -= cell.size;
+
+                auto & queue = cell.is_protected ? protected_queue : probationary_queue;
+                queue.erase(cell.queue_iterator);
+                it = cells.erase(it);
+            }
+            else
+                ++it;
+        }
+    }
+
     MappedPtr get(const Key & key) override
     {
         auto it = cells.find(key);
diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h
index 6401835b0d7..6caab1d6c78 100644
--- a/src/Common/TTLCachePolicy.h
+++ b/src/Common/TTLCachePolicy.h
@@ -133,6 +133,23 @@ public:
         Base::user_quotas->clear();
     }
 
+    void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
+    {
+        for(auto it = cache.begin(); it != cache.end();)
+        {
+            if(predicate(it->first, it->second))
+            {
+                size_t sz = weight_function(*it->second);
+                if (it->first.user_id.has_value())
+                    Base::user_quotas->decreaseActual(*it->first.user_id, sz);
+                it = cache.erase(it);
+                size_in_bytes -= sz;
+            }
+            else
+                ++it;
+        }
+    }
+
     void remove(const Key & key) override
     {
         auto it = cache.find(key);
diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index ab926037c67..9c4d48fbd44 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -619,6 +619,15 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis
     return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size);
 }
 
+void QueryCache::clearWithTag(const String & tag)
+{
+    auto removeWithTag = [tag](const Key & k, const Cache::MappedPtr & _){
+      return k.tag == tag;
+    };
+    cache.removeWithPredicate(removeWithTag);
+    std::lock_guard lock(mutex);
+}
+
 void QueryCache::clear()
 {
     cache.clear();
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index c7ebaf4d26a..dec43747e0a 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -212,6 +212,7 @@ public:
     Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota);
 
     void clear();
+    void clearWithTag(const String & tag);
 
     size_t sizeInBytes() const;
     size_t count() const;
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 3cc09370e86..ef5539f0b13 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -3236,6 +3236,14 @@ void Context::clearQueryCache() const
         shared->query_cache->clear();
 }
 
+void Context::clearQueryCacheWithTag(const String & tag) const
+{
+    std::lock_guard lock(shared->mutex);
+
+    if (shared->query_cache)
+        shared->query_cache->clearWithTag(tag);
+}
+
 void Context::clearCaches() const
 {
     std::lock_guard lock(shared->mutex);
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 3da4f124553..076bff462f9 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1069,6 +1069,7 @@ public:
     void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
     std::shared_ptr<QueryCache> getQueryCache() const;
     void clearQueryCache() const;
+    void clearQueryCacheWithTag(const String & tag) const;
 
     /** Clear the caches of the uncompressed blocks and marks.
       * This is usually done when renaming tables, changing the type of columns, deleting a table.
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 21c8b44b374..c71982d0934 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -369,9 +369,12 @@ BlockIO InterpreterSystemQuery::execute()
             system_context->clearMMappedFileCache();
             break;
         case Type::DROP_QUERY_CACHE:
-            getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE);
-            getContext()->clearQueryCache();
+        {
+            getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE);       
+            !query.query_cache_tag.empty() ? getContext()->clearQueryCacheWithTag(query.query_cache_tag): getContext()->clearQueryCache();
             break;
+        }
+
         case Type::DROP_COMPILED_EXPRESSION_CACHE:
 #if USE_EMBEDDED_COMPILER
             getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE);
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 59de90b1d8e..739d71678df 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -129,6 +129,7 @@ public:
     String storage_policy;
     String volume;
     String disk;
+    String query_cache_tag;
     UInt64 seconds{};
 
     String filesystem_cache_name;
diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h
index cc2ee79cd36..a079fba5d53 100644
--- a/src/Parsers/CommonParsers.h
+++ b/src/Parsers/CommonParsers.h
@@ -471,6 +471,7 @@ namespace DB
     MR_MACROS(TABLE, "TABLE") \
     MR_MACROS(TABLES, "TABLES") \
     MR_MACROS(TAGS, "TAGS") \
+    MR_MACROS(TAG, "TAG") \
     MR_MACROS(TAGS_INNER_UUID, "TAGS INNER UUID") \
     MR_MACROS(TEMPORARY_TABLE, "TEMPORARY TABLE") \
     MR_MACROS(TEMPORARY, "TEMPORARY") \
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index efabbbfa479..95d89e6fde1 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -489,6 +489,16 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
                 return false;
             break;
         }
+        case Type::DROP_QUERY_CACHE:
+        {
+            ParserLiteral tag_parser;
+            ASTPtr ast;
+            if (ParserKeyword{Keyword::TAG}.ignore(pos, expected) && tag_parser.parse(pos, ast, expected))
+                res->query_cache_tag = ast->as<ASTLiteral>()->value.safeGet<String>();
+            if (!parseQueryWithOnCluster(res, pos, expected))
+                return false;
+            break;
+        }
         case Type::SYNC_FILESYSTEM_CACHE:
         {
             ParserLiteral path_parser;

From cd51535a67319d06fae7ca212123c9cb9114c91c Mon Sep 17 00:00:00 2001
From: Michal Tabaszewski <michalwtabaszewski@protonmail.com>
Date: Sat, 17 Aug 2024 01:54:46 +0200
Subject: [PATCH 270/363] Fixed style.

---
 src/Common/LRUCachePolicy.h                 | 4 ++--
 src/Common/SLRUCachePolicy.h                | 4 ++--
 src/Common/TTLCachePolicy.h                 | 4 ++--
 src/Interpreters/Cache/QueryCache.cpp       | 5 +++--
 src/Interpreters/InterpreterSystemQuery.cpp | 2 +-
 5 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h
index 39bf5067aea..9f2ea548c32 100644
--- a/src/Common/LRUCachePolicy.h
+++ b/src/Common/LRUCachePolicy.h
@@ -70,9 +70,9 @@ public:
 
     void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
     {
-        for(auto it = cells.begin(); it != cells.end();)
+        for (auto it = cells.begin(); it != cells.end();)
         {
-            if(predicate(it->first, it->second.value))
+            if (predicate(it->first, it->second.value))
             {
                 auto & cell = it->second;
                 current_size_in_bytes -= cell.size;
diff --git a/src/Common/SLRUCachePolicy.h b/src/Common/SLRUCachePolicy.h
index 62c6d72a3e8..e6931bbd59c 100644
--- a/src/Common/SLRUCachePolicy.h
+++ b/src/Common/SLRUCachePolicy.h
@@ -97,9 +97,9 @@ public:
 
     void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
     {
-        for(auto it = cells.begin(); it != cells.end();)
+        for (auto it = cells.begin(); it != cells.end();)
         {
-            if(predicate(it->first, it->second.value))
+            if (predicate(it->first, it->second.value))
             {
                 auto & cell = it->second;
 
diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h
index 6caab1d6c78..100d753e53b 100644
--- a/src/Common/TTLCachePolicy.h
+++ b/src/Common/TTLCachePolicy.h
@@ -135,9 +135,9 @@ public:
 
     void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
     {
-        for(auto it = cache.begin(); it != cache.end();)
+        for (auto it = cache.begin(); it != cache.end();)
         {
-            if(predicate(it->first, it->second))
+            if (predicate(it->first, it->second))
             {
                 size_t sz = weight_function(*it->second);
                 if (it->first.user_id.has_value())
diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 9c4d48fbd44..84aa9cd0342 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -621,10 +621,11 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis
 
 void QueryCache::clearWithTag(const String & tag)
 {
-    auto removeWithTag = [tag](const Key & k, const Cache::MappedPtr & _){
+    auto remove_with_tag = [tag](const Key & k, const Cache::MappedPtr & _)
+    {
       return k.tag == tag;
     };
-    cache.removeWithPredicate(removeWithTag);
+    cache.removeWithPredicate(remove_with_tag);
     std::lock_guard lock(mutex);
 }
 
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index c71982d0934..c7540084451 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -370,7 +370,7 @@ BlockIO InterpreterSystemQuery::execute()
             break;
         case Type::DROP_QUERY_CACHE:
         {
-            getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE);       
+            getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE);
             !query.query_cache_tag.empty() ? getContext()->clearQueryCacheWithTag(query.query_cache_tag): getContext()->clearQueryCache();
             break;
         }

From ad20d24c6e0aa50ea408627389acb4c83080d3e0 Mon Sep 17 00:00:00 2001
From: Michal Tabaszewski <michalwtabaszewski@protonmail.com>
Date: Mon, 19 Aug 2024 20:52:51 +0200
Subject: [PATCH 271/363] Fixed binary-tidy complaint

---
 src/Interpreters/Cache/QueryCache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 84aa9cd0342..6c1ffe9cecb 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -621,7 +621,7 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis
 
 void QueryCache::clearWithTag(const String & tag)
 {
-    auto remove_with_tag = [tag](const Key & k, const Cache::MappedPtr & _)
+    auto remove_with_tag = [tag](const Key & k, const Cache::MappedPtr &)
     {
       return k.tag == tag;
     };

From eee618b3196cecb44f6314a4199d5aa95ff2ca66 Mon Sep 17 00:00:00 2001
From: Michal Tabaszewski <michalwtabaszewski@protonmail.com>
Date: Mon, 19 Aug 2024 22:37:28 +0200
Subject: [PATCH 272/363] -


From 129d1e387e1b436ac572ca5db1a50dd9ed79ddae Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Aug 2024 13:21:42 +0000
Subject: [PATCH 273/363] Some fixups

---
 docs/en/operations/query-cache.md             |  2 ++
 docs/en/sql-reference/statements/system.md    |  6 ++++
 src/Common/CacheBase.h                        |  4 +--
 src/Common/ICachePolicy.h                     |  2 +-
 src/Common/LRUCachePolicy.h                   | 33 +++++++++----------
 src/Common/SLRUCachePolicy.h                  |  2 +-
 src/Common/TTLCachePolicy.h                   | 26 +++++++--------
 src/Interpreters/Cache/QueryCache.cpp         | 19 +++++------
 src/Interpreters/Cache/QueryCache.h           |  3 +-
 src/Interpreters/Context.cpp                  | 12 ++-----
 src/Interpreters/Context.h                    |  3 +-
 src/Interpreters/InterpreterSystemQuery.cpp   |  2 +-
 src/Parsers/ASTSystemQuery.h                  |  3 +-
 src/Parsers/CommonParsers.h                   |  2 +-
 src/Parsers/ParserSystemQuery.cpp             | 20 +++++------
 .../02494_query_cache_drop_cache.reference    | 14 ++++++++
 .../02494_query_cache_drop_cache.sql          | 25 ++++++++++++--
 17 files changed, 105 insertions(+), 73 deletions(-)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index 384938e28f6..955cec0234e 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -155,6 +155,8 @@ SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1';
 SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2';
 ```
 
+To remove only entries with tag `tag` from the query cache, you can use statement `SYSTEM DROP QUERY CACHE TAG 'tag'`.
+
 ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
 etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
 [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 3ebcf617491..77d023b67ce 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -136,7 +136,13 @@ The compiled expression cache is enabled/disabled with the query/user/profile-le
 
 ## DROP QUERY CACHE
 
+```sql
+SYSTEM DROP QUERY CACHE;
+SYSTEM DROP QUERY CACHE TAG '<tag>'
+````
+
 Clears the [query cache](../../operations/query-cache.md).
+If a tag is specified, only query cache entries with the specified tag are deleted.
 
 ## DROP FORMAT SCHEMA CACHE {#system-drop-schema-format}
 
diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h
index e0f7ffbfd5f..23e6a6fc91c 100644
--- a/src/Common/CacheBase.h
+++ b/src/Common/CacheBase.h
@@ -197,10 +197,10 @@ public:
         cache_policy->remove(key);
     }
 
-    void removeWithPredicate(std::function<bool(const Key&, const MappedPtr &)> predicate)
+    void remove(std::function<bool(const Key&, const MappedPtr &)> predicate)
     {
         std::lock_guard lock(mutex);
-        cache_policy->removeWithPredicate(predicate);
+        cache_policy->remove(predicate);
     }
 
     size_t sizeInBytes() const
diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h
index 71543e0aed0..567fa35d977 100644
--- a/src/Common/ICachePolicy.h
+++ b/src/Common/ICachePolicy.h
@@ -55,7 +55,7 @@ public:
     virtual void set(const Key & key, const MappedPtr & mapped) = 0;
 
     virtual void remove(const Key & key) = 0;
-    virtual void removeWithPredicate(std::function<bool(const Key&, const MappedPtr &)> predicate) = 0;
+    virtual void remove(std::function<bool(const Key & key, const MappedPtr & mapped)> predicate) = 0;
 
     virtual void clear() = 0;
     virtual std::vector<KeyMapped> dump() const = 0;
diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h
index 9f2ea548c32..cb8fdbd2b9c 100644
--- a/src/Common/LRUCachePolicy.h
+++ b/src/Common/LRUCachePolicy.h
@@ -68,23 +68,6 @@ public:
         current_size_in_bytes = 0;
     }
 
-    void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
-    {
-        for (auto it = cells.begin(); it != cells.end();)
-        {
-            if (predicate(it->first, it->second.value))
-            {
-                auto & cell = it->second;
-                current_size_in_bytes -= cell.size;
-                queue.erase(cell.queue_iterator);
-                cells.erase(it);
-                it = cells.erase(it);
-            }
-            else
-                ++it;
-        }
-    }
-
     void remove(const Key & key) override
     {
         auto it = cells.find(key);
@@ -96,6 +79,22 @@ public:
         cells.erase(it);
     }
 
+    void remove(std::function<bool(const Key &, const MappedPtr &)> predicate) override
+    {
+        for (auto it = cells.begin(); it != cells.end();)
+        {
+            if (predicate(it->first, it->second.value))
+            {
+                Cell & cell = it->second;
+                current_size_in_bytes -= cell.size;
+                queue.erase(cell.queue_iterator);
+                it = cells.erase(it);
+            }
+            else
+                ++it;
+        }
+    }
+
     MappedPtr get(const Key & key) override
     {
         auto it = cells.find(key);
diff --git a/src/Common/SLRUCachePolicy.h b/src/Common/SLRUCachePolicy.h
index e6931bbd59c..5321110f3e5 100644
--- a/src/Common/SLRUCachePolicy.h
+++ b/src/Common/SLRUCachePolicy.h
@@ -95,7 +95,7 @@ public:
         cells.erase(it);
     }
 
-    void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
+    void remove(std::function<bool(const Key &, const MappedPtr &)> predicate) override
     {
         for (auto it = cells.begin(); it != cells.end();)
         {
diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h
index 100d753e53b..6c548e5042b 100644
--- a/src/Common/TTLCachePolicy.h
+++ b/src/Common/TTLCachePolicy.h
@@ -133,7 +133,19 @@ public:
         Base::user_quotas->clear();
     }
 
-    void removeWithPredicate(std::function<bool(const Key&, const MappedPtr&)> predicate) override
+    void remove(const Key & key) override
+    {
+        auto it = cache.find(key);
+        if (it == cache.end())
+            return;
+        size_t sz = weight_function(*it->second);
+        if (it->first.user_id.has_value())
+            Base::user_quotas->decreaseActual(*it->first.user_id, sz);
+        cache.erase(it);
+        size_in_bytes -= sz;
+    }
+
+    void remove(std::function<bool(const Key &, const MappedPtr &)> predicate) override
     {
         for (auto it = cache.begin(); it != cache.end();)
         {
@@ -150,18 +162,6 @@ public:
         }
     }
 
-    void remove(const Key & key) override
-    {
-        auto it = cache.find(key);
-        if (it == cache.end())
-            return;
-        size_t sz = weight_function(*it->second);
-        if (it->first.user_id.has_value())
-            Base::user_quotas->decreaseActual(*it->first.user_id, sz);
-        cache.erase(it);
-        size_in_bytes -= sz;
-    }
-
     MappedPtr get(const Key & key) override
     {
         auto it = cache.find(key);
diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 6c1ffe9cecb..4312b35e18c 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -619,19 +619,18 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis
     return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size);
 }
 
-void QueryCache::clearWithTag(const String & tag)
+void QueryCache::clear(const std::optional<String> & tag)
 {
-    auto remove_with_tag = [tag](const Key & k, const Cache::MappedPtr &)
+    if (tag)
     {
-      return k.tag == tag;
-    };
-    cache.removeWithPredicate(remove_with_tag);
-    std::lock_guard lock(mutex);
-}
+        auto predicate = [tag](const Key & key, const Cache::MappedPtr &) { return key.tag == tag.value(); };
+        cache.remove(predicate);
+    }
+    else
+    {
+        cache.clear();
+    }
 
-void QueryCache::clear()
-{
-    cache.clear();
     std::lock_guard lock(mutex);
     times_executed.clear();
 }
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index dec43747e0a..64407633a8d 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -211,8 +211,7 @@ public:
     Reader createReader(const Key & key);
     Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota);
 
-    void clear();
-    void clearWithTag(const String & tag);
+    void clear(const std::optional<String> & tag);
 
     size_t sizeInBytes() const;
     size_t count() const;
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ef5539f0b13..cfcaf437510 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -3228,20 +3228,12 @@ QueryCachePtr Context::getQueryCache() const
     return shared->query_cache;
 }
 
-void Context::clearQueryCache() const
+void Context::clearQueryCache(const std::optional<String> & tag) const
 {
     std::lock_guard lock(shared->mutex);
 
     if (shared->query_cache)
-        shared->query_cache->clear();
-}
-
-void Context::clearQueryCacheWithTag(const String & tag) const
-{
-    std::lock_guard lock(shared->mutex);
-
-    if (shared->query_cache)
-        shared->query_cache->clearWithTag(tag);
+        shared->query_cache->clear(tag);
 }
 
 void Context::clearCaches() const
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 076bff462f9..e0c69471e60 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1068,8 +1068,7 @@ public:
     void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows);
     void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
     std::shared_ptr<QueryCache> getQueryCache() const;
-    void clearQueryCache() const;
-    void clearQueryCacheWithTag(const String & tag) const;
+    void clearQueryCache(const std::optional<String> & tag) const;
 
     /** Clear the caches of the uncompressed blocks and marks.
       * This is usually done when renaming tables, changing the type of columns, deleting a table.
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index c7540084451..d4e2f22036c 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -371,7 +371,7 @@ BlockIO InterpreterSystemQuery::execute()
         case Type::DROP_QUERY_CACHE:
         {
             getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE);
-            !query.query_cache_tag.empty() ? getContext()->clearQueryCacheWithTag(query.query_cache_tag): getContext()->clearQueryCache();
+            getContext()->clearQueryCache(query.query_cache_tag);
             break;
         }
 
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 739d71678df..d9f5b425182 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -129,9 +129,10 @@ public:
     String storage_policy;
     String volume;
     String disk;
-    String query_cache_tag;
     UInt64 seconds{};
 
+    std::optional<String> query_cache_tag;
+
     String filesystem_cache_name;
     std::string key_to_drop;
     std::optional<size_t> offset_to_drop;
diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h
index a079fba5d53..ab0e70eb0e5 100644
--- a/src/Parsers/CommonParsers.h
+++ b/src/Parsers/CommonParsers.h
@@ -470,8 +470,8 @@ namespace DB
     MR_MACROS(TABLE_OVERRIDE, "TABLE OVERRIDE") \
     MR_MACROS(TABLE, "TABLE") \
     MR_MACROS(TABLES, "TABLES") \
-    MR_MACROS(TAGS, "TAGS") \
     MR_MACROS(TAG, "TAG") \
+    MR_MACROS(TAGS, "TAGS") \
     MR_MACROS(TAGS_INNER_UUID, "TAGS INNER UUID") \
     MR_MACROS(TEMPORARY_TABLE, "TEMPORARY TABLE") \
     MR_MACROS(TEMPORARY, "TEMPORARY") \
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index 95d89e6fde1..af84dd10bfa 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -471,6 +471,16 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
             res->seconds = seconds->as<ASTLiteral>()->value.safeGet<UInt64>();
             break;
         }
+        case Type::DROP_QUERY_CACHE:
+        {
+            ParserLiteral tag_parser;
+            ASTPtr ast;
+            if (ParserKeyword{Keyword::TAG}.ignore(pos, expected) && tag_parser.parse(pos, ast, expected))
+                res->query_cache_tag = std::make_optional<String>(ast->as<ASTLiteral>()->value.safeGet<String>());
+            if (!parseQueryWithOnCluster(res, pos, expected))
+                return false;
+            break;
+        }
         case Type::DROP_FILESYSTEM_CACHE:
         {
             ParserLiteral path_parser;
@@ -489,16 +499,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
                 return false;
             break;
         }
-        case Type::DROP_QUERY_CACHE:
-        {
-            ParserLiteral tag_parser;
-            ASTPtr ast;
-            if (ParserKeyword{Keyword::TAG}.ignore(pos, expected) && tag_parser.parse(pos, ast, expected))
-                res->query_cache_tag = ast->as<ASTLiteral>()->value.safeGet<String>();
-            if (!parseQueryWithOnCluster(res, pos, expected))
-                return false;
-            break;
-        }
         case Type::SYNC_FILESYSTEM_CACHE:
         {
             ParserLiteral path_parser;
diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.reference b/tests/queries/0_stateless/02494_query_cache_drop_cache.reference
index 2f1465d1598..6481b5e0770 100644
--- a/tests/queries/0_stateless/02494_query_cache_drop_cache.reference
+++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.reference
@@ -1,3 +1,17 @@
+Cache query result in query cache
 1
 1
+DROP entries with a certain tag, no entry will match
+1
+After a full DROP, the cache is empty now
+0
+Cache query result with different or no tag in query cache
+1
+1
+1
+2
+4
+DROP entries with certain tags
+2
+1
 0
diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql
index bc2e7f442fc..3d064169a4e 100644
--- a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql
+++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql
@@ -4,10 +4,31 @@
 -- (it's silly to use what will be tested below but we have to assume other tests cluttered the query cache)
 SYSTEM DROP QUERY CACHE;
 
--- Cache query result in query cache
+SELECT 'Cache query result in query cache';
 SELECT 1 SETTINGS use_query_cache = true;
 SELECT count(*) FROM system.query_cache;
 
--- No query results are cached after DROP
+SELECT 'DROP entries with a certain tag, no entry will match';
+SYSTEM DROP QUERY CACHE TAG 'tag';
+SELECT count(*) FROM system.query_cache;
+
+SELECT 'After a full DROP, the cache is empty now';
 SYSTEM DROP QUERY CACHE;
 SELECT count(*) FROM system.query_cache;
+
+-- More tests for DROP with tags:
+
+SELECT 'Cache query result with different or no tag in query cache';
+SELECT 1 SETTINGS use_query_cache = true;
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc';
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'def';
+SELECT 2 SETTINGS use_query_cache = true;
+SELECT count(*) FROM system.query_cache;
+
+SELECT 'DROP entries with certain tags';
+SYSTEM DROP QUERY CACHE TAG '';
+SELECT count(*) FROM system.query_cache;
+SYSTEM DROP QUERY CACHE TAG 'def';
+SELECT count(*) FROM system.query_cache;
+SYSTEM DROP QUERY CACHE TAG 'abc';
+SELECT count(*) FROM system.query_cache;

From f5a020247486442f74861c61f162c647ebd97f8d Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Tue, 20 Aug 2024 10:35:51 +0000
Subject: [PATCH 274/363] Add integration test

---
 .../NamedCollectionsMetadataStorage.cpp       |   4 +-
 .../__init__.py                               |   0
 .../config.d/named_collections_encrypted.xml  |  12 ++
 ...d_collections_with_zookeeper_encrypted.xml |  31 +++++
 .../configs/users.d/users.xml                 |  17 +++
 .../test_named_collections_encrypted/test.py  | 115 ++++++++++++++++++
 6 files changed, 177 insertions(+), 2 deletions(-)
 create mode 100644 tests/integration/test_named_collections_encrypted/__init__.py
 create mode 100644 tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml
 create mode 100644 tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml
 create mode 100644 tests/integration/test_named_collections_encrypted/configs/users.d/users.xml
 create mode 100644 tests/integration/test_named_collections_encrypted/test.py

diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
index b8269d2d55a..8a6f3b9f6d5 100644
--- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
+++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
@@ -628,9 +628,9 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
         const auto path = config.getString(named_collections_storage_config_path + ".path");
 
         std::unique_ptr<INamedCollectionsStorage> zk_storage;
-        if (storage_type == "zookeeper" || storage_type == "keeper")
+        if (!storage_type.ends_with("_encrypted"))
             zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
-        else if (storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted")
+        else
             zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted>(context_, path);
 
         LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
diff --git a/tests/integration/test_named_collections_encrypted/__init__.py b/tests/integration/test_named_collections_encrypted/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml
new file mode 100644
index 00000000000..233e23846cb
--- /dev/null
+++ b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml
@@ -0,0 +1,12 @@
+<clickhouse>
+  <named_collections_storage>
+    <type>local_encrypted</type>
+    <key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
+  </named_collections_storage>
+
+  <named_collections>
+    <collection1>
+      <key1>value1</key1>
+    </collection1>
+  </named_collections>
+</clickhouse>
diff --git a/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml
new file mode 100644
index 00000000000..d1dd5c29787
--- /dev/null
+++ b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml
@@ -0,0 +1,31 @@
+<clickhouse>
+  <named_collections_storage>
+    <type>zookeeper_encrypted</type>
+    <key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
+    <path>/named_collections_path/</path>
+    <update_timeout_ms>5000</update_timeout_ms>
+  </named_collections_storage>
+
+  <named_collections>
+    <collection1>
+      <key1>value1</key1>
+    </collection1>
+  </named_collections>
+
+  <remote_servers>
+      <replicated_nc_nodes_cluster>
+          <shard>
+              <internal_replication>true</internal_replication>
+              <replica>
+                  <host>node_with_keeper</host>
+                  <port>9000</port>
+              </replica>
+              <replica>
+                  <host>node_with_keeper_2</host>
+                  <port>9000</port>
+              </replica>
+          </shard>
+          <allow_distributed_ddl_queries>true</allow_distributed_ddl_queries>
+      </replicated_nc_nodes_cluster>
+  </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml b/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml
new file mode 100644
index 00000000000..7d4f0543ff1
--- /dev/null
+++ b/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml
@@ -0,0 +1,17 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <ignore_on_cluster_for_replicated_named_collections_queries>0</ignore_on_cluster_for_replicated_named_collections_queries>
+        </default>
+    </profiles>
+    <users>
+        <default>
+            <password></password>
+            <profile>default</profile>
+            <quota>default</quota>
+            <named_collection_control>1</named_collection_control>
+            <show_named_collections>1</show_named_collections>
+            <show_named_collections_secrets>1</show_named_collections_secrets>
+        </default>
+    </users>
+</clickhouse>
diff --git a/tests/integration/test_named_collections_encrypted/test.py b/tests/integration/test_named_collections_encrypted/test.py
new file mode 100644
index 00000000000..adc60b08b7f
--- /dev/null
+++ b/tests/integration/test_named_collections_encrypted/test.py
@@ -0,0 +1,115 @@
+import logging
+import pytest
+import os
+from helpers.cluster import ClickHouseCluster
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+NAMED_COLLECTIONS_CONFIG = os.path.join(
+    SCRIPT_DIR, "./configs/config.d/named_collections.xml"
+)
+
+ZK_PATH = "/named_collections_path"
+
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        cluster.add_instance(
+            "node_encrypted",
+            main_configs=[
+                "configs/config.d/named_collections_encrypted.xml",
+            ],
+            user_configs=[
+                "configs/users.d/users.xml",
+            ],
+            stay_alive=True,
+        )
+        cluster.add_instance(
+            "node_with_keeper_encrypted",
+            main_configs=[
+                "configs/config.d/named_collections_with_zookeeper_encrypted.xml",
+            ],
+            user_configs=[
+                "configs/users.d/users.xml",
+            ],
+            stay_alive=True,
+            with_zookeeper=True,
+        )
+        cluster.add_instance(
+            "node_with_keeper_2_encrypted",
+            main_configs=[
+                "configs/config.d/named_collections_with_zookeeper_encrypted.xml",
+            ],
+            user_configs=[
+                "configs/users.d/users.xml",
+            ],
+            stay_alive=True,
+            with_zookeeper=True,
+        )
+
+        logging.info("Starting cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def check_encrypted_content(node, zk=None):
+    assert (
+        "collection1\ncollection2"
+        == node.query("select name from system.named_collections").strip()
+    )
+
+    assert (
+        "['key1','key2']"
+        == node.query(
+            "select mapKeys(collection) from system.named_collections where name = 'collection2'"
+        ).strip()
+    )
+
+    assert (
+        "1234\tvalue2"
+        == node.query(
+            "select collection['key1'], collection['key2'] from system.named_collections where name = 'collection2'"
+        ).strip()
+    )
+
+    # Check that the underlying storage is encrypted
+    content = zk.get(ZK_PATH + "/collection2.sql")[0] if zk is not None else open(f"{node.path}/database/named_collections/collection2.sql", "rb").read()
+
+    assert content[0:3] == b"ENC"  # file signature (aka magic number) of the encrypted file
+    assert b"key1" not in content
+    assert b"1234" not in content
+    assert b"key2" not in content
+    assert B"value2" not in content
+
+
+def test_local_storage_encrypted(cluster):
+    node = cluster.instances["node_encrypted"]
+    node.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'")
+
+    check_encrypted_content(node)
+    node.restart_clickhouse()
+    check_encrypted_content(node)
+
+    node.query("DROP NAMED COLLECTION collection2")
+
+
+def test_zookeper_storage_encrypted(cluster):
+    node1 = cluster.instances["node_with_keeper_encrypted"]
+    node2 = cluster.instances["node_with_keeper_2_encrypted"]
+    zk = cluster.get_kazoo_client("zoo1")
+
+    node1.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'")
+
+    check_encrypted_content(node1, zk)
+    check_encrypted_content(node2, zk)
+    node1.restart_clickhouse()
+    node2.restart_clickhouse()
+    check_encrypted_content(node1, zk)
+    check_encrypted_content(node2, zk)
+
+    node1.query("DROP NAMED COLLECTION collection2")

From e416a2b3d2f9ff2395a218e79f9417cb96dafbda Mon Sep 17 00:00:00 2001
From: leonkozlowski <leon.kozlowski@flocksafety.com>
Date: Tue, 20 Aug 2024 09:42:19 -0400
Subject: [PATCH 275/363] patch: fix reference to sorting key in primary key
 docs

---
 docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 183b94f4641..0b693775dde 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -80,7 +80,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
 `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
 
 Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key.
-It is usually not necessary to specify the primary key in addition to the primary key.
+It is usually not necessary to specify the primary key in addition to the sorting key.
 
 #### SAMPLE BY
 

From d10e65291ccea45e46aab5edcc21ee047de6d5fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Tue, 2 Jul 2024 10:32:51 +0000
Subject: [PATCH 276/363] Fix docs

---
 docs/en/operations/settings/settings.md    | 12 +++++++++++-
 src/Storages/MergeTree/MergeTreeSettings.h |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 19db4be17db..3617e6a3167 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2855,7 +2855,7 @@ The minimum chunk size in bytes, which each thread will parse in parallel.
 
 ## merge_selecting_sleep_ms {#merge_selecting_sleep_ms}
 
-Sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters.
+Minimum sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters.
 
 Possible values:
 
@@ -2863,6 +2863,16 @@ Possible values:
 
 Default value: `5000`.
 
+## max_merge_selecting_sleep_ms
+
+Maximum sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters.
+
+Possible values:
+
+- Any positive integer.
+
+Default value: `60000`.
+
 ## parallel_distributed_insert_select {#parallel_distributed_insert_select}
 
 Enables parallel distributed `INSERT ... SELECT` query.
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index de1f0f60cfc..676f776df54 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -67,7 +67,7 @@ struct Settings;
     M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \
     M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \
     M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \
-    M(UInt64, merge_selecting_sleep_ms, 5000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \
+    M(UInt64, merge_selecting_sleep_ms, 5000, "Minimum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \
     M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \
     M(Float, merge_selecting_sleep_slowdown_factor, 1.2f, "The sleep time for merge selecting task is multiplied by this factor when there's nothing to merge and divided when a merge was assigned", 0) \
     M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \

From e547875a9cdffc53bb601c1f9d32c2a6fb6af560 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 5 Jul 2024 13:27:16 +0000
Subject: [PATCH 277/363] Improve wording

---
 docs/en/operations/settings/settings.md    | 4 ++--
 src/Storages/MergeTree/MergeTreeSettings.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 3617e6a3167..7bd36ccd00f 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2855,7 +2855,7 @@ The minimum chunk size in bytes, which each thread will parse in parallel.
 
 ## merge_selecting_sleep_ms {#merge_selecting_sleep_ms}
 
-Minimum sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters.
+Minimum time to wait before trying to select parts to merge again after no parts were selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters.
 
 Possible values:
 
@@ -2865,7 +2865,7 @@ Default value: `5000`.
 
 ## max_merge_selecting_sleep_ms
 
-Maximum sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters.
+Maximum time to wait before trying to select parts to merge again after no parts were selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters.
 
 Possible values:
 
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 676f776df54..0769b60dc6b 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -67,8 +67,8 @@ struct Settings;
     M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \
     M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \
     M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \
-    M(UInt64, merge_selecting_sleep_ms, 5000, "Minimum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \
-    M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \
+    M(UInt64, merge_selecting_sleep_ms, 5000, "Minimum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \
+    M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \
     M(Float, merge_selecting_sleep_slowdown_factor, 1.2f, "The sleep time for merge selecting task is multiplied by this factor when there's nothing to merge and divided when a merge was assigned", 0) \
     M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \
     M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \

From c56ae57af861a3bc7bcc1f423b22c6c3256b2b73 Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Tue, 20 Aug 2024 14:06:39 +0000
Subject: [PATCH 278/363] Update documentation

---
 docs/en/operations/named-collections.md | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md
index 340a5a8f87a..470acaa7200 100644
--- a/docs/en/operations/named-collections.md
+++ b/docs/en/operations/named-collections.md
@@ -73,13 +73,21 @@ In the above example the `password_sha256_hex` value is the hexadecimal represen
 
 ### Storage for named collections
 
-Named collections can either be stored on local disk or in zookeeper/keeper. By default local storage is used.
+Named collections can either be stored on local disk or in ZooKeeper/Keeper. By default local storage is used.
+They can also be stored using encryption with the same algorithms used for [disk encryption](storing-data#encrypted-virtual-file-system),
+where `aes_128_ctr` is used by default.
 
-To configure named collections storage in keeper and a `type` (equal to either `keeper` or `zookeeper`) and `path` (path in keeper, where named collections will be stored) to `named_collections_storage` section in configuration file:
+To configure named collections storage you need to speficy a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage,
+you can use `local_encrypted` or `keeper_encrypted`/`zookeeper_encrypted`.
+
+To use ZooKeeper/Keeper we also need to set up a `path` (path in ZooKeeper/Keeper, where named collections will be stored) to
+`named_collections_storage` section in configuration file. The following example uses encryption and ZooKeeper/Keeper:
 ```
 <clickhouse>
   <named_collections_storage>
-    <type>zookeeper</type>
+    <type>zookeeper_encrypted</type>
+    <key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
+    <algorithm>aes_128_ctr</algorithm>
     <path>/named_collections_path/</path>
     <update_timeout_ms>1000</update_timeout_ms>
   </named_collections_storage>
@@ -315,7 +323,7 @@ The description of parameters see [postgresql](../sql-reference/table-functions/
 Parameter `addresses_expr` is used in a collection instead of `host:port`. The parameter is optional, because there are other optional ones: `host`, `hostname`, `port`. The following pseudo code explains the priority:
 
 ```sql
-CASE 
+CASE
     WHEN collection['addresses_expr'] != '' THEN collection['addresses_expr']
     WHEN collection['host'] != ''           THEN collection['host'] || ':' || if(collection['port'] != '', collection['port'], '5432')
     WHEN collection['hostname'] != ''       THEN collection['hostname'] || ':' || if(collection['port'] != '', collection['port'], '5432')
@@ -496,7 +504,7 @@ kafka_topic_list = 'kafka_topic',
 kafka_group_name = 'consumer_group',
 kafka_format = 'JSONEachRow',
 kafka_max_block_size = '1048576';
-       
+
 ```
 ### XML example
 

From dd3921897195a7027ef060f166ebcb611608763c Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Tue, 20 Aug 2024 14:24:53 +0000
Subject: [PATCH 279/363] Fix style

---
 docs/en/operations/named-collections.md            |  2 +-
 .../test_named_collections_encrypted/test.py       | 14 +++++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md
index 470acaa7200..1c82aeaaf2c 100644
--- a/docs/en/operations/named-collections.md
+++ b/docs/en/operations/named-collections.md
@@ -77,7 +77,7 @@ Named collections can either be stored on local disk or in ZooKeeper/Keeper. By
 They can also be stored using encryption with the same algorithms used for [disk encryption](storing-data#encrypted-virtual-file-system),
 where `aes_128_ctr` is used by default.
 
-To configure named collections storage you need to speficy a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage,
+To configure named collections storage you need to specify a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage,
 you can use `local_encrypted` or `keeper_encrypted`/`zookeeper_encrypted`.
 
 To use ZooKeeper/Keeper we also need to set up a `path` (path in ZooKeeper/Keeper, where named collections will be stored) to
diff --git a/tests/integration/test_named_collections_encrypted/test.py b/tests/integration/test_named_collections_encrypted/test.py
index adc60b08b7f..7dff32fa6c9 100644
--- a/tests/integration/test_named_collections_encrypted/test.py
+++ b/tests/integration/test_named_collections_encrypted/test.py
@@ -78,13 +78,21 @@ def check_encrypted_content(node, zk=None):
     )
 
     # Check that the underlying storage is encrypted
-    content = zk.get(ZK_PATH + "/collection2.sql")[0] if zk is not None else open(f"{node.path}/database/named_collections/collection2.sql", "rb").read()
+    content = (
+        zk.get(ZK_PATH + "/collection2.sql")[0]
+        if zk is not None
+        else open(
+            f"{node.path}/database/named_collections/collection2.sql", "rb"
+        ).read()
+    )
 
-    assert content[0:3] == b"ENC"  # file signature (aka magic number) of the encrypted file
+    assert (
+        content[0:3] == b"ENC"
+    )  # file signature (aka magic number) of the encrypted file
     assert b"key1" not in content
     assert b"1234" not in content
     assert b"key2" not in content
-    assert B"value2" not in content
+    assert b"value2" not in content
 
 
 def test_local_storage_encrypted(cluster):

From 677b28e1ac5f0962f1e43ba7589787105c5f1553 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <antaljanosbenjamin@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:04:19 +0200
Subject: [PATCH 280/363] Fix docs

---
 docs/en/engines/table-engines/integrations/hdfs.md | 2 +-
 docs/en/engines/table-engines/integrations/s3.md   | 2 +-
 docs/en/sql-reference/table-functions/hdfs.md      | 2 +-
 docs/en/sql-reference/table-functions/s3.md        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md
index c9df713231a..404cec97def 100644
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@@ -240,7 +240,7 @@ libhdfs3 support HDFS namenode HA.
 ## Storage Settings {#storage-settings}
 
 - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
-- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [hdfs_create_new_file_on_insert](/docs/en/operations/settings/settings.md#hdfs_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 
 **See Also**
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index d664c37bd0f..48a08dfa499 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -225,7 +225,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
 ## Storage Settings {#storage-settings}
 
 - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
-- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 
 ## S3-related Settings {#settings}
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index f96e48d914d..30d2e371c7e 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -116,7 +116,7 @@ SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parque
 ## Storage Settings {#storage-settings}
 
 - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
-- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [hdfs_create_new_file_on_insert](/docs/en/operations/settings/settings.md#hdfs_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 - [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
 
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 1bd9f38517e..181c92b92d4 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -290,7 +290,7 @@ SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date
 ## Storage Settings {#storage-settings}
 
 - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
-- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 
 **See Also**

From eb2f50e49d45f9418d9490f8c3da1af23006e9cc Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 20 Aug 2024 15:28:04 +0000
Subject: [PATCH 281/363] Do not fuzz settings for
 02835_drop_user_during_session.sh

---
 tests/queries/0_stateless/02835_drop_user_during_session.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.sh b/tests/queries/0_stateless/02835_drop_user_during_session.sh
index c32003a2a11..01e4f9a5c2b 100755
--- a/tests/queries/0_stateless/02835_drop_user_during_session.sh
+++ b/tests/queries/0_stateless/02835_drop_user_during_session.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-debug
+# Tags: no-debug, no-random-settings, no-random-merge-tree-settings
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 0091f16af9426acbcc696b57f813aeb1222e0687 Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Tue, 20 Aug 2024 16:13:16 +0000
Subject: [PATCH 282/363] Fix build without SSL support

---
 .../NamedCollectionsMetadataStorage.cpp         | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
index 8a6f3b9f6d5..e9f7816ce73 100644
--- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
+++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp
@@ -30,6 +30,7 @@ namespace ErrorCodes
     extern const int INVALID_CONFIG_PARAMETER;
     extern const int BAD_ARGUMENTS;
     extern const int LOGICAL_ERROR;
+    extern const int SUPPORT_IS_DISABLED;
 }
 
 static const std::string named_collections_storage_config_path = "named_collections_storage";
@@ -361,6 +362,8 @@ private:
     }
 };
 
+#if USE_SSL
+
 template <typename BaseMetadataStorage>
 class NamedCollectionsMetadataStorageEncrypted : public BaseMetadataStorage
 {
@@ -444,6 +447,8 @@ class NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted : public NamedC
     using NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::ZooKeeperStorage>::NamedCollectionsMetadataStorageEncrypted;
 };
 
+#endif
+
 NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage(
     std::shared_ptr<INamedCollectionsStorage> storage_,
     ContextPtr context_)
@@ -618,7 +623,13 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
         if (storage_type == "local")
             local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
         else if (storage_type == "local_encrypted")
+        {
+#if USE_SSL
             local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorageEncrypted>(context_, path);
+#else
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support");
+#endif
+        }
 
         return std::unique_ptr<NamedCollectionsMetadataStorage>(
             new NamedCollectionsMetadataStorage(std::move(local_storage), context_));
@@ -631,7 +642,13 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
         if (!storage_type.ends_with("_encrypted"))
             zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
         else
+        {
+#if USE_SSL
             zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted>(context_, path);
+#else
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support");
+#endif
+        }
 
         LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
                   "Using zookeeper storage for named collections at path: {}", path);

From 4d0fc70227e4b850953630508f65b06cbafc62f0 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Thu, 8 Aug 2024 10:07:05 +0200
Subject: [PATCH 283/363] Fix keyed hash issue with empty array/map input

---
 src/Functions/FunctionsHashing.h                        | 5 ++---
 tests/queries/0_stateless/02534_keyed_siphash.reference | 5 +++++
 tests/queries/0_stateless/02534_keyed_siphash.sql       | 6 ++++++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 95c54ac9528..3dd63b64346 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -96,9 +96,8 @@ namespace impl
             {
                 const auto *const begin = offsets->begin();
                 const auto * upper = std::upper_bound(begin, offsets->end(), i);
-                if (upper == offsets->end())
-                    throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i);
-                i = upper - begin;
+                if (upper != offsets->end())
+                    i = upper - begin;
             }
             const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
             const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index 3f478218ff1..22644ca1291 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -239,3 +239,8 @@ Check bug found fuzzing
 Check bug 2 found fuzzing
 608E1FF030C9E206185B112C2A25F1A7
 ABB65AE97711A2E053E324ED88B1D08B
+Check bug 3 found fuzzing
+4761183170873013810
+0AD04BFD000000000000000000000000
+4761183170873013810
+0AD04BFD000000000000000000000000
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index fb707109c83..a595a97592a 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -345,3 +345,9 @@ CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory;
 INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'});
 SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a;
 DROP TABLE sipHashKeyed_keys;
+
+SELECT 'Check bug 3 found fuzzing';
+SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []::Array(UInt8));
+SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []::Array(UInt8)));
+SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []));
+SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])));

From ef2912b0f7d225a6c390ddbc77b7a30b994fe1a8 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Fri, 9 Aug 2024 18:09:52 +0200
Subject: [PATCH 284/363] Fix old and wrong assert

---
 src/Functions/FunctionsHashing.h                        | 5 +++--
 tests/queries/0_stateless/02534_keyed_siphash.reference | 2 ++
 tests/queries/0_stateless/02534_keyed_siphash.sql       | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 3dd63b64346..c01ad5cf7e0 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -83,8 +83,7 @@ namespace impl
         {
             assert(key0 && key1);
             assert(key0->size() == key1->size());
-            assert(offsets == nullptr || offsets->size() == key0->size());
-            if (offsets != nullptr)
+            if (offsets != nullptr && !offsets->empty())
                 return offsets->back();
             return key0->size();
         }
@@ -92,6 +91,8 @@ namespace impl
         {
             if (is_const)
                 i = 0;
+            assert(key0->size() == key1->size());
+            assert(key0->size() > i);
             if (offsets != nullptr)
             {
                 const auto *const begin = offsets->begin();
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index 22644ca1291..b65992a5ce3 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -244,3 +244,5 @@ Check bug 3 found fuzzing
 0AD04BFD000000000000000000000000
 4761183170873013810
 0AD04BFD000000000000000000000000
+16734549324845627102
+D675BB3D687973A238AB891DD99C7047
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index a595a97592a..4e6b32966d4 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -351,3 +351,5 @@ SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []::Array(UInt8));
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []::Array(UInt8)));
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []));
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])));
+SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3));
+SELECT hex(sipHash128Keyed((0::UInt64, 0::UInt64), map([0], 1, [2], 3)));

From 6b7a25746af94e1dbb85de65304e47e041d4b006 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Tue, 13 Aug 2024 18:05:09 +0200
Subject: [PATCH 285/363] Improve test description

---
 tests/queries/0_stateless/02534_keyed_siphash.reference | 4 ++--
 tests/queries/0_stateless/02534_keyed_siphash.sql       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index b65992a5ce3..31c0cae8981 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -236,10 +236,10 @@ Check asan bug
 0
 Check bug found fuzzing
 9042C6691B1A75F0EA3314B6F55728BB
-Check bug 2 found fuzzing
+Test arrays and maps
 608E1FF030C9E206185B112C2A25F1A7
 ABB65AE97711A2E053E324ED88B1D08B
-Check bug 3 found fuzzing
+Test emtpy arrays and maps
 4761183170873013810
 0AD04BFD000000000000000000000000
 4761183170873013810
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index 4e6b32966d4..b96233200a8 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -339,14 +339,14 @@ SELECT 'Check bug found fuzzing';
 SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL;  -- { serverError NOT_IMPLEMENTED }
 SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1])));
 
-SELECT 'Check bug 2 found fuzzing';
+SELECT 'Test arrays and maps';
 DROP TABLE IF EXISTS sipHashKeyed_keys;
 CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory;
 INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'});
 SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a;
 DROP TABLE sipHashKeyed_keys;
 
-SELECT 'Check bug 3 found fuzzing';
+SELECT 'Test emtpy arrays and maps';
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []::Array(UInt8));
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []::Array(UInt8)));
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []));

From 027f913a139b39023633dbe038e4ee58d3141950 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Wed, 14 Aug 2024 11:17:04 +0200
Subject: [PATCH 286/363] Do not check i before it gets its final value

---
 src/Functions/FunctionsHashing.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index c01ad5cf7e0..8d1c41f4c5f 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -92,7 +92,6 @@ namespace impl
             if (is_const)
                 i = 0;
             assert(key0->size() == key1->size());
-            assert(key0->size() > i);
             if (offsets != nullptr)
             {
                 const auto *const begin = offsets->begin();
@@ -102,6 +101,7 @@ namespace impl
             }
             const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
             const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
+            assert(key0->size() > i);
             return {key0data[i], key1data[i]};
         }
     };

From 434458cc830d2ced68f1f96dcfa13f967c9bc74e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 16 Aug 2024 11:22:22 +0000
Subject: [PATCH 287/363] Remove -n / --multiquery

---
 .../00115_shard_in_incomplete_result.sh            |  2 +-
 .../0_stateless/00550_join_insert_select.sh        |  2 +-
 .../0_stateless/01086_window_view_cleanup.sh       |  2 +-
 ...396_inactive_replica_cleanup_nodes_zookeeper.sh |  4 ++--
 .../0_stateless/01455_opentelemetry_distributed.sh |  2 +-
 .../0_stateless/01509_format_raw_blob.reference    |  4 ++--
 tests/queries/0_stateless/01509_format_raw_blob.sh |  4 ++--
 .../01565_query_loop_after_client_error.expect     |  2 +-
 .../01811_storage_buffer_flush_parameters.sh       |  6 +++---
 ...3_correct_block_size_prediction_with_default.sh |  4 ++--
 .../02020_alter_table_modify_comment.sh            |  2 +-
 ...e_sorting_by_input_stream_properties_explain.sh |  4 ++--
 .../0_stateless/02383_join_and_filtering_set.sh    |  2 +-
 .../0_stateless/02423_ddl_for_opentelemetry.sh     |  2 +-
 tests/queries/0_stateless/02539_settings_alias.sh  |  4 ++--
 .../02697_stop_reading_on_first_cancel.sh          |  2 +-
 .../0_stateless/02703_row_policies_for_asterisk.sh |  2 +-
 .../02703_row_policies_for_database_combination.sh |  2 +-
 .../0_stateless/02703_row_policy_for_database.sh   |  4 ++--
 tests/queries/0_stateless/02724_delay_mutations.sh |  6 +++---
 ...02765_queries_with_subqueries_profile_events.sh | 14 +++++++-------
 .../queries/0_stateless/02841_not_ready_set_bug.sh |  4 ++--
 .../0_stateless/02871_peak_threads_usage.sh        | 14 +++++++-------
 .../0_stateless/02911_backup_restore_keeper_map.sh |  6 +++---
 .../0_stateless/02968_file_log_multiple_read.sh    |  4 ++--
 .../03002_part_log_rmt_fetch_merge_error.sh        |  8 ++++----
 .../03002_part_log_rmt_fetch_mutate_error.sh       | 10 +++++-----
 .../03164_selects_with_pk_usage_profile_event.sh   |  8 ++++----
 .../0_stateless/03172_system_detached_tables.sh    |  4 ++--
 .../03173_parallel_replicas_join_bug.sh            |  2 +-
 30 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh b/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh
index 5c3918dea9f..4916721764c 100755
--- a/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh
+++ b/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 set -o errexit
 set -o pipefail
 
-$CLICKHOUSE_CLIENT -n --query="
+$CLICKHOUSE_CLIENT --query="
     DROP TABLE IF EXISTS users;
     CREATE TABLE users (UserID UInt64) ENGINE = Log;
     INSERT INTO users VALUES (1468013291393583084);
diff --git a/tests/queries/0_stateless/00550_join_insert_select.sh b/tests/queries/0_stateless/00550_join_insert_select.sh
index bfaccb613ca..ee2f3ab286b 100755
--- a/tests/queries/0_stateless/00550_join_insert_select.sh
+++ b/tests/queries/0_stateless/00550_join_insert_select.sh
@@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -n --ignore-error --query="
+$CLICKHOUSE_CLIENT --ignore-error --query="
 DROP TABLE IF EXISTS test1_00550;
 DROP TABLE IF EXISTS test2_00550;
 DROP TABLE IF EXISTS test3_00550;
diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh
index 8b8e794c8ff..1bfa3c50869 100755
--- a/tests/queries/0_stateless/01086_window_view_cleanup.sh
+++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh
@@ -13,7 +13,7 @@ opts=(
 
 DATABASE_ORDINARY="${CLICKHOUSE_DATABASE}_ordinary"
 
-$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 --multiquery "
+$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 "
     SET allow_experimental_window_view = 1;
     SET window_view_clean_interval = 1;
 
diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index bff85b3e29f..4a0b6a8c93c 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -13,7 +13,7 @@ REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')")
 
 SCALE=1000
 
-$CLICKHOUSE_CLIENT -n --query "
+$CLICKHOUSE_CLIENT --query "
     DROP TABLE IF EXISTS r1;
     DROP TABLE IF EXISTS r2;
     CREATE TABLE r1 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard}', '1{replica}') ORDER BY x
@@ -46,7 +46,7 @@ $CLICKHOUSE_CLIENT --receive_timeout 600 --query "SYSTEM SYNC REPLICA r2" # Need
 
 $CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/2$REPLICA' AND name = 'is_lost'";
 
-$CLICKHOUSE_CLIENT -n --query "
+$CLICKHOUSE_CLIENT --query "
     DROP TABLE IF EXISTS r1;
     DROP TABLE IF EXISTS r2;
 "
diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
index 2b6da6132ed..30940f93a56 100755
--- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
+++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 function check_log
 {
-${CLICKHOUSE_CLIENT} --format=JSONEachRow -nq "
+${CLICKHOUSE_CLIENT} --format=JSONEachRow -q "
 set enable_analyzer = 1;
 system flush logs;
 
diff --git a/tests/queries/0_stateless/01509_format_raw_blob.reference b/tests/queries/0_stateless/01509_format_raw_blob.reference
index 05014001bd9..eb074457e07 100644
--- a/tests/queries/0_stateless/01509_format_raw_blob.reference
+++ b/tests/queries/0_stateless/01509_format_raw_blob.reference
@@ -1,2 +1,2 @@
-9fd46251e5574c633cbfbb9293671888  -
-9fd46251e5574c633cbfbb9293671888  -
+48fad37bc89fc3bcc29c4750897b6709  -
+48fad37bc89fc3bcc29c4750897b6709  -
diff --git a/tests/queries/0_stateless/01509_format_raw_blob.sh b/tests/queries/0_stateless/01509_format_raw_blob.sh
index 3d1d3fbb17b..355928014e8 100755
--- a/tests/queries/0_stateless/01509_format_raw_blob.sh
+++ b/tests/queries/0_stateless/01509_format_raw_blob.sh
@@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-${CLICKHOUSE_CLIENT} -n --query "
+${CLICKHOUSE_CLIENT} --query "
 DROP TABLE IF EXISTS t;
 CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory;
 "
@@ -12,7 +12,7 @@ CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory;
 ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT RawBLOB" < ${BASH_SOURCE[0]}
 
 cat ${BASH_SOURCE[0]} | md5sum
-${CLICKHOUSE_CLIENT} -n --query "SELECT * FROM t FORMAT RawBLOB" | md5sum
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM t FORMAT RawBLOB" | md5sum
 
 ${CLICKHOUSE_CLIENT} --query "
 DROP TABLE t;
diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
index 6253840c63c..f08ef911da4 100755
--- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
+++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
@@ -24,7 +24,7 @@ expect_after {
     -i $any_spawn_id timeout { exit 1 }
 }
 
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -m --history_file=$history_file --highlight 0"
 expect "\n:) "
 
 send -- "DROP TABLE IF EXISTS t01565;\r"
diff --git a/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh b/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh
index 6a5949741ab..7878867e159 100755
--- a/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh
+++ b/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh
@@ -17,7 +17,7 @@ function wait_with_limit()
     done
 }
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     drop table if exists data_01811;
     drop table if exists buffer_01811;
 
@@ -39,9 +39,9 @@ $CLICKHOUSE_CLIENT -nm -q "
 # wait for background buffer flush
 wait_with_limit 30 '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01811") -gt 0 ]]'
 
-$CLICKHOUSE_CLIENT -nm -q "select count() from data_01811"
+$CLICKHOUSE_CLIENT -m -q "select count() from data_01811"
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     drop table buffer_01811;
     drop table data_01811;
 "
diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
index 1482730af2c..57f9b5595de 100755
--- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
+++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 sql="toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1]))"
 
 # Create the table and fill it
-$CLICKHOUSE_CLIENT -n --query="
+$CLICKHOUSE_CLIENT --query="
     CREATE TABLE test_extract(str String,  arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=(\"[^\"]*?\"|[^\",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple();
     INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000);
     ALTER TABLE test_extract ADD COLUMN 15Id Nullable(UInt16) DEFAULT $sql;"
@@ -24,7 +24,7 @@ function test()
     $CLICKHOUSE_CLIENT --query="SELECT uniq(15Id) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_1
     uuid_2=$(cat /proc/sys/kernel/random/uuid)
     $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2
-    $CLICKHOUSE_CLIENT -n --query="
+    $CLICKHOUSE_CLIENT --query="
         SYSTEM FLUSH LOGS;
         WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1),
              memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2)
diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
index 3448f052f51..fa2d84e131a 100755
--- a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
+++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
@@ -16,7 +16,7 @@ function test_table_comments()
     local ENGINE_NAME="$1"
     echo "engine : ${ENGINE_NAME}"
 
-    $CLICKHOUSE_CLIENT -nm <<EOF
+    $CLICKHOUSE_CLIENT -m <<EOF
     DROP TABLE IF EXISTS comment_test_table;
 
     CREATE TABLE comment_test_table
diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh
index 974f10e2f24..52f48dcbb91 100755
--- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh
+++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh
@@ -20,9 +20,9 @@ function explain_sorting {
 
 function explain_sortmode {
     echo "-- QUERY: "$1
-    $CLICKHOUSE_CLIENT --enable_analyzer=0 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTMODE
+    $CLICKHOUSE_CLIENT --enable_analyzer=0 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -q "$1" | eval $FIND_SORTMODE
     echo "-- QUERY (analyzer): "$1
-    $CLICKHOUSE_CLIENT --enable_analyzer=1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTMODE
+    $CLICKHOUSE_CLIENT --enable_analyzer=1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -q "$1" | eval $FIND_SORTMODE
 }
 
 $CLICKHOUSE_CLIENT -q "drop table if exists optimize_sorting sync"
diff --git a/tests/queries/0_stateless/02383_join_and_filtering_set.sh b/tests/queries/0_stateless/02383_join_and_filtering_set.sh
index 3a6d60811c9..a3f12381c97 100755
--- a/tests/queries/0_stateless/02383_join_and_filtering_set.sh
+++ b/tests/queries/0_stateless/02383_join_and_filtering_set.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -mn -q """
+$CLICKHOUSE_CLIENT -m -q """
 DROP TABLE IF EXISTS t1;
 DROP TABLE IF EXISTS t2;
 
diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh
index 7136698d5b7..b23c4f376fc 100755
--- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh
+++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh
@@ -39,7 +39,7 @@ function check_span()
         extra_condition=""
     fi
 
-    ret=$(${CLICKHOUSE_CLIENT} -nq "
+    ret=$(${CLICKHOUSE_CLIENT} -q "
         SYSTEM FLUSH LOGS;
 
         SELECT count()
diff --git a/tests/queries/0_stateless/02539_settings_alias.sh b/tests/queries/0_stateless/02539_settings_alias.sh
index 5cfa09ad0fa..c770633c0ac 100755
--- a/tests/queries/0_stateless/02539_settings_alias.sh
+++ b/tests/queries/0_stateless/02539_settings_alias.sh
@@ -10,7 +10,7 @@ for check_query in "SELECT value FROM system.settings WHERE name = 'alter_sync';
     echo "Checking setting value with '$check_query'"
 
     echo 'Using SET'
-    $CLICKHOUSE_CLIENT -mn -q """
+    $CLICKHOUSE_CLIENT -m -q """
     SET replication_alter_partitions_sync = 0;
     $check_query
 
@@ -28,7 +28,7 @@ for check_query in "SELECT value FROM system.settings WHERE name = 'alter_sync';
 done
 
 
-$CLICKHOUSE_CLIENT -mn -q """
+$CLICKHOUSE_CLIENT -m -q """
 DROP VIEW IF EXISTS 02539_settings_alias_view;
 CREATE VIEW 02539_settings_alias_view AS SELECT 1 SETTINGS replication_alter_partitions_sync = 2;
 SHOW CREATE TABLE 02539_settings_alias_view;
diff --git a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
index 5a2cec08eca..2de267a79d7 100755
--- a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
+++ b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 QUERY_ID="${CLICKHOUSE_DATABASE}_read_with_cancel"
 
-$CLICKHOUSE_CLIENT --max_rows_to_read 0 -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" &
+$CLICKHOUSE_CLIENT --max_rows_to_read 0 --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" &
 pid=$!
 
 for _ in {0..60}
diff --git a/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh b/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh
index f9670e5f6f8..bb75ab5041b 100755
--- a/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh
+++ b/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh
@@ -3,7 +3,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
     SELECT 'Policy for table \`*\` does not affect other tables in the database';
     CREATE ROW POLICY 02703_asterisk_${CLICKHOUSE_DATABASE}_policy ON ${CLICKHOUSE_DATABASE}.\`*\` USING x=1 AS permissive TO ALL;
     CREATE TABLE ${CLICKHOUSE_DATABASE}.\`*\` (x UInt8, y UInt8) ENGINE = MergeTree ORDER BY x AS SELECT 100, 20;
diff --git a/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh b/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh
index 35151eed220..756f71fc043 100755
--- a/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh
+++ b/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh
@@ -3,7 +3,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 
 DROP TABLE IF EXISTS 02703_rptable;
 DROP TABLE IF EXISTS 02703_rptable_another;
diff --git a/tests/queries/0_stateless/02703_row_policy_for_database.sh b/tests/queries/0_stateless/02703_row_policy_for_database.sh
index e94bc7acd5e..c29fa313825 100755
--- a/tests/queries/0_stateless/02703_row_policy_for_database.sh
+++ b/tests/queries/0_stateless/02703_row_policy_for_database.sh
@@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 CLICKHOUSE_USER="user_$CLICKHOUSE_DATABASE"
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 
 DROP USER IF EXISTS ${CLICKHOUSE_USER};
 CREATE USER ${CLICKHOUSE_USER};
@@ -28,7 +28,7 @@ DROP POLICY ${CLICKHOUSE_DATABASE}_tb_policy ON ${CLICKHOUSE_DATABASE}.table;
 
 $CLICKHOUSE_CLIENT --query "CREATE ROW POLICY any_02703 ON *.some_table USING 1 AS PERMISSIVE TO ALL;" 2>&1 | grep -q "SYNTAX_ERROR"
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 CREATE TABLE 02703_rqtable_default (x UInt8) ENGINE = MergeTree ORDER BY x;
 
 CREATE ROW POLICY ${CLICKHOUSE_DATABASE}_filter_11_db_policy ON * USING x=1 AS permissive TO ALL;
diff --git a/tests/queries/0_stateless/02724_delay_mutations.sh b/tests/queries/0_stateless/02724_delay_mutations.sh
index f349e29253a..7843e692822 100755
--- a/tests/queries/0_stateless/02724_delay_mutations.sh
+++ b/tests/queries/0_stateless/02724_delay_mutations.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=./mergetree_mutations.lib
 . "$CURDIR"/mergetree_mutations.lib
 
-${CLICKHOUSE_CLIENT} -n --query "
+${CLICKHOUSE_CLIENT} --query "
 DROP TABLE IF EXISTS t_delay_mutations SYNC;
 
 CREATE TABLE t_delay_mutations (id UInt64, v UInt64)
@@ -36,14 +36,14 @@ SELECT count() FROM system.mutations WHERE database = currentDatabase() AND tabl
 ${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_delay_mutations"
 wait_for_mutation "t_delay_mutations" "mutation_5.txt"
 
-${CLICKHOUSE_CLIENT} -n --query "
+${CLICKHOUSE_CLIENT} --query "
 SELECT * FROM t_delay_mutations ORDER BY id;
 SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_delay_mutations' AND NOT is_done;
 
 DROP TABLE IF EXISTS t_delay_mutations SYNC;
 "
 
-${CLICKHOUSE_CLIENT} -n --query "
+${CLICKHOUSE_CLIENT} --query "
 SYSTEM FLUSH LOGS;
 
 SELECT
diff --git a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh
index b7d93b5396c..fd64e8d8cb8 100755
--- a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh
+++ b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh
@@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -n -q "
+$CLICKHOUSE_CLIENT -q "
     DROP TABLE IF EXISTS mv;
     DROP TABLE IF EXISTS output;
     DROP TABLE IF EXISTS input;
@@ -17,7 +17,7 @@ $CLICKHOUSE_CLIENT -n -q "
 for enable_analyzer in 0 1; do
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "INSERT INTO input SELECT * FROM numbers(1)"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 view,
@@ -35,7 +35,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "SELECT * FROM system.one WHERE dummy IN (SELECT * FROM system.one) FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 subquery,
@@ -52,7 +52,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 CSE,
@@ -69,7 +69,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x, x FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 CSE_Multi,
@@ -86,7 +86,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 CTE,
@@ -103,7 +103,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x UNION ALL SELECT * FROM x FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 CTE_Multi,
diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh
index 556e2f52de2..d5a2d034014 100755
--- a/tests/queries/0_stateless/02841_not_ready_set_bug.sh
+++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh
@@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE 1 in (SELECT number fro
 $CLICKHOUSE_CLIENT -q "SELECT xor(1, 0) FROM system.parts WHERE 1 IN (SELECT 1) FORMAT Null"
 
 # (Not all of these tests are effective because some of these tables are empty.)
-$CLICKHOUSE_CLIENT -nq "
+$CLICKHOUSE_CLIENT -q "
     select * from system.columns where table in (select '123');
     select * from system.replicas where database in (select '123');
     select * from system.data_skipping_indices where database in (select '123');
@@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -nq "
     select * from system.replication_queue where database in (select '123');
     select * from system.distribution_queue where database in (select '123');
 "
-$CLICKHOUSE_CLIENT -nq "
+$CLICKHOUSE_CLIENT -q "
     create table a (x Int8) engine MergeTree order by x;
     insert into a values (1);
     select * from mergeTreeIndex(currentDatabase(), 'a') where part_name in (select '123');
diff --git a/tests/queries/0_stateless/02871_peak_threads_usage.sh b/tests/queries/0_stateless/02871_peak_threads_usage.sh
index dfb3e665020..0f0473bbb47 100755
--- a/tests/queries/0_stateless/02871_peak_threads_usage.sh
+++ b/tests/queries/0_stateless/02871_peak_threads_usage.sh
@@ -26,7 +26,7 @@ ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_6" --query='SELECT * FROM nu
 ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_7" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 1, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}"
 ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_8" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 4, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -mn --query="""
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -m --query="""
 SELECT count() FROM 
     (SELECT number FROM numbers_mt(1,100000) 
             UNION ALL SELECT number FROM numbers_mt(10000, 200000)
@@ -38,7 +38,7 @@ SELECT count() FROM
             UNION ALL SELECT number FROM numbers_mt(300000, 4000000)
     ) SETTINGS max_threads = 1""" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -mn --query="""
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -m --query="""
 SELECT count() FROM 
     (SELECT number FROM numbers_mt(1,100000) 
             UNION ALL SELECT number FROM numbers_mt(10000, 2000)
@@ -50,7 +50,7 @@ SELECT count() FROM
             UNION ALL SELECT number FROM numbers_mt(300000, 4000000)
     ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -mn --query="""
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -m --query="""
 SELECT count() FROM 
     (SELECT number FROM numbers_mt(1,100000) 
             UNION ALL SELECT number FROM numbers_mt(1, 1)
@@ -62,20 +62,20 @@ SELECT count() FROM
             UNION ALL SELECT number FROM numbers_mt(1, 4000000)
     ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -mn --query="""
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -m --query="""
 SELECT sum(number) FROM numbers_mt(100000)
 GROUP BY number % 2
 WITH TOTALS ORDER BY number % 2
 SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}"
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -m --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}"
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -m --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}"
 
 ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
 for i in {1..14}
 do
-    ${CLICKHOUSE_CLIENT} -mn --query="""
+    ${CLICKHOUSE_CLIENT} -m --query="""
     SELECT '${i}',
            peak_threads_usage, 
            (select count() from system.query_thread_log WHERE system.query_thread_log.query_id = '${UNIQUE_QUERY_ID}_${i}' AND current_database = currentDatabase()) = length(thread_ids),
diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
index c04667505c3..01aba244a02 100755
--- a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
+++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
@@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 database_name="$CLICKHOUSE_DATABASE"_02911_keeper_map
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     DROP DATABASE IF EXISTS $database_name;
     CREATE DATABASE $database_name;
     CREATE TABLE $database_name.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
@@ -13,9 +13,9 @@ $CLICKHOUSE_CLIENT -nm -q "
     CREATE TABLE $database_name.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key;
 "
 
-$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;"
+$CLICKHOUSE_CLIENT -m -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;"
 
-$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;"
+$CLICKHOUSE_CLIENT -m -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;"
 
 backup_path="$database_name"
 for i in $(seq 1 3); do
diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh
index d9bae05270a..0879bf02d60 100755
--- a/tests/queries/0_stateless/02968_file_log_multiple_read.sh
+++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh
@@ -15,7 +15,7 @@ do
 	echo $i >> ${logs_dir}/a.txt
 done
 
-${CLICKHOUSE_CLIENT} -n --query="
+${CLICKHOUSE_CLIENT} --query="
 DROP TABLE IF EXISTS file_log;
 DROP TABLE IF EXISTS table_to_store_data;
 DROP TABLE IF EXISTS file_log_mv;
@@ -69,7 +69,7 @@ done
 
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM table_to_store_data ORDER BY id;"
 
-${CLICKHOUSE_CLIENT} -n --query="
+${CLICKHOUSE_CLIENT} --query="
 DROP TABLE file_log;
 DROP TABLE table_to_store_data;
 DROP TABLE file_log_mv;
diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh
index e58c542b8ac..185e46a2eac 100755
--- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh
+++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh
@@ -11,13 +11,13 @@ set -e
 function wait_until()
 {
     local q=$1 && shift
-    while [ "$($CLICKHOUSE_CLIENT -nm -q "$q")" != "1" ]; do
+    while [ "$($CLICKHOUSE_CLIENT -m -q "$q")" != "1" ]; do
         # too frequent FLUSH LOGS is too costly
         sleep 2
     done
 }
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     drop table if exists rmt_master;
     drop table if exists rmt_slave;
 
@@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT -nm -q "
     optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1;
 "
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     system flush logs;
     select 'before';
     select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3;
@@ -42,7 +42,7 @@ $CLICKHOUSE_CLIENT -nm -q "
 "
 # wait until rmt_slave will fetch the part and reflect this error in system.part_log
 wait_until "system flush logs; select count()>0 from system.part_log where table = 'rmt_slave' and database = '$CLICKHOUSE_DATABASE' and error > 0"
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     system sync replica rmt_slave;
 
     system flush logs;
diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh
index cc8f53aafb9..e731d51e7e3 100755
--- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh
+++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh
@@ -14,13 +14,13 @@ set -e
 function wait_until()
 {
     local q=$1 && shift
-    while [ "$($CLICKHOUSE_CLIENT -nm -q "$q")" != "1" ]; do
+    while [ "$($CLICKHOUSE_CLIENT -m -q "$q")" != "1" ]; do
         # too frequent FLUSH LOGS is too costly
         sleep 2
     done
 }
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     drop table if exists rmt_master;
     drop table if exists rmt_slave;
 
@@ -41,10 +41,10 @@ $CLICKHOUSE_CLIENT -nm -q "
 # the part, and rmt_slave will consider it instead of performing mutation on
 # it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored
 wait_for_mutation rmt_master 0000000000
-$CLICKHOUSE_CLIENT -nm -q "system start pulling replication log rmt_slave"
+$CLICKHOUSE_CLIENT -m -q "system start pulling replication log rmt_slave"
 # and wait until rmt_slave to fetch the part and reflect this error in system.part_log
 wait_until "system flush logs; select count()>0 from system.part_log where table = 'rmt_slave' and database = '$CLICKHOUSE_DATABASE' and error > 0"
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     system flush logs;
     select 'before';
     select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3;
@@ -52,7 +52,7 @@ $CLICKHOUSE_CLIENT -nm -q "
     system start replicated sends rmt_master;
 "
 wait_for_mutation rmt_slave 0000000000
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     system sync replica rmt_slave;
 
     system flush logs;
diff --git a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh
index 29d4c877909..75efc3f057a 100755
--- a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh
+++ b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh
@@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT -q "
 query_id="$(random_str 10)"
 $CLICKHOUSE_CLIENT --query_id "$query_id" -q "
     SELECT count(*) FROM table_$table_id FORMAT Null;"
-$CLICKHOUSE_CLIENT -mn -q "
+$CLICKHOUSE_CLIENT -m -q "
     SYSTEM FLUSH LOGS;
     SELECT
         ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
@@ -50,7 +50,7 @@ $CLICKHOUSE_CLIENT -mn -q "
 query_id="$(random_str 10)"
 $CLICKHOUSE_CLIENT --query_id "$query_id" -q "
     SELECT count(*) FROM table_$table_id WHERE col2 >= 50000 FORMAT Null;"
-$CLICKHOUSE_CLIENT -mn -q "
+$CLICKHOUSE_CLIENT -m -q "
     SYSTEM FLUSH LOGS;
     SELECT
         ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
@@ -67,7 +67,7 @@ $CLICKHOUSE_CLIENT -mn -q "
 query_id="$(random_str 10)"
 $CLICKHOUSE_CLIENT --query_id "$query_id" -q "
     SELECT count(*) FROM table_$table_id WHERE pk >= 50000 FORMAT Null;"
-$CLICKHOUSE_CLIENT -mn -q "
+$CLICKHOUSE_CLIENT -m -q "
     SYSTEM FLUSH LOGS;
     SELECT
         ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
@@ -84,7 +84,7 @@ $CLICKHOUSE_CLIENT -mn -q "
 query_id="$(random_str 10)"
 $CLICKHOUSE_CLIENT --query_id "$query_id" -q "
     SELECT count(*) FROM table_$table_id WHERE col1 >= 50000 FORMAT Null;"
-$CLICKHOUSE_CLIENT -mn -q "
+$CLICKHOUSE_CLIENT -m -q "
     SYSTEM FLUSH LOGS;
     SELECT
         ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sh b/tests/queries/0_stateless/03172_system_detached_tables.sh
index 47775abcc45..60e913b62a8 100755
--- a/tests/queries/0_stateless/03172_system_detached_tables.sh
+++ b/tests/queries/0_stateless/03172_system_detached_tables.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 DATABASE_ATOMIC="${CLICKHOUSE_DATABASE}_atomic"
 DATABASE_LAZY="${CLICKHOUSE_DATABASE}_lazy"
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 
 SELECT 'database atomic tests';
 DROP DATABASE IF EXISTS ${DATABASE_ATOMIC};
@@ -36,7 +36,7 @@ DROP DATABASE ${DATABASE_ATOMIC} SYNC;
 
 "
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 
 SELECT '-----------------------';
 SELECT 'database lazy tests';
diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh
index af702569794..d2be9899f86 100755
--- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh
+++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh
@@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT -q "
   INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED');
 "
 
-$CLICKHOUSE_CLIENT -nq "
+$CLICKHOUSE_CLIENT -q "
 SET enable_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 10, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1;
 
 SELECT

From 4c6f30a70df20bb53625aceb1eb5256664f99080 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 20 Aug 2024 17:46:29 +0000
Subject: [PATCH 288/363] Cosmetics

---
 src/Functions/FunctionsHashing.h              | 50 +++++++++++--------
 .../0_stateless/02534_keyed_siphash.sql       | 12 ++---
 .../02552_siphash128_reference.sql            |  4 +-
 3 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 8d1c41f4c5f..0cf4246fd66 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -77,7 +77,7 @@ namespace impl
         ColumnPtr key0;
         ColumnPtr key1;
         bool is_const;
-        const ColumnArray::Offsets * offsets{};
+        const ColumnArray::Offsets * offsets = nullptr;
 
         size_t size() const
         {
@@ -87,6 +87,7 @@ namespace impl
                 return offsets->back();
             return key0->size();
         }
+
         SipHashKey getKey(size_t i) const
         {
             if (is_const)
@@ -94,7 +95,7 @@ namespace impl
             assert(key0->size() == key1->size());
             if (offsets != nullptr)
             {
-                const auto *const begin = offsets->begin();
+                const auto * const begin = offsets->begin();
                 const auto * upper = std::upper_bound(begin, offsets->end(), i);
                 if (upper != offsets->end())
                     i = upper - begin;
@@ -108,33 +109,38 @@ namespace impl
 
     static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key)
     {
-        const ColumnTuple * tuple = nullptr;
-        const auto * column = key.column.get();
-        bool is_const = false;
-        if (isColumnConst(*column))
+        const auto * col_key = key.column.get();
+
+        bool is_const;
+        const ColumnTuple * col_key_tuple;
+        if (isColumnConst(*col_key))
         {
             is_const = true;
-            tuple = checkAndGetColumnConstData<ColumnTuple>(column);
+            col_key_tuple = checkAndGetColumnConstData<ColumnTuple>(col_key);
         }
         else
-            tuple = checkAndGetColumn<ColumnTuple>(column);
-        if (!tuple)
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple");
-        if (tuple->tupleSize() != 2)
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
+        {
+            is_const = false;
+            col_key_tuple = checkAndGetColumn<ColumnTuple>(col_key);
+        }
 
-        SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const};
-        assert(ret.key0);
-        if (!checkColumn<ColumnUInt64>(*ret.key0))
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
-        assert(ret.key1);
-        if (!checkColumn<ColumnUInt64>(*ret.key1))
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
+        if (!col_key_tuple || col_key_tuple->tupleSize() != 2)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The key must be of type Tuple(UInt64, UInt64)");
 
-        if (ret.size() == 1)
-            ret.is_const = true;
+        SipHashKeyColumns result{.key0 = col_key_tuple->getColumnPtr(0), .key1 = col_key_tuple->getColumnPtr(1), .is_const = is_const};
 
-        return ret;
+        assert(result.key0);
+        assert(result.key1);
+
+        if (!checkColumn<ColumnUInt64>(*result.key0))
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 1st element of the key tuple is not of type UInt64");
+        if (!checkColumn<ColumnUInt64>(*result.key1))
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 2nd element of the key tuple is not of type UInt64");
+
+        if (result.size() == 1)
+            result.is_const = true;
+
+        return result;
     }
 }
 
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index b96233200a8..b499d8ef02b 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -263,10 +263,10 @@ select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8,
 select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62));
 select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63));
 
-select sipHash64Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED }
-select sipHash128Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED }
-select sipHash64Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED }
-select sipHash128Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED }
+select sipHash64Keyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS }
+select sipHash128Keyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS }
+select sipHash64Keyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS }
+select sipHash128Keyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS }
 
 select hex(sipHash64());
 SELECT hex(sipHash128());
@@ -347,8 +347,8 @@ SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FRO
 DROP TABLE sipHashKeyed_keys;
 
 SELECT 'Test emtpy arrays and maps';
-SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []::Array(UInt8));
-SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []::Array(UInt8)));
+SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []);
+SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []));
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []));
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])));
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3));
diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql
index f7324ed0ee4..46f292d667d 100644
--- a/tests/queries/0_stateless/02552_siphash128_reference.sql
+++ b/tests/queries/0_stateless/02552_siphash128_reference.sql
@@ -200,8 +200,8 @@ select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5,
 select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62));
 select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63));
 
-select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED }
-select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED }
+select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS }
+select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS }
 
 SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000';
 SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000';

From 4b08ae5f650313d286d416c68d92c240677ceb0f Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 20 Aug 2024 21:51:48 +0200
Subject: [PATCH 289/363] Restart CI

---
 tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2
index 263e92be403..dd60a31f771 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2
+++ b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2
@@ -2,6 +2,7 @@ set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
 set allow_experimental_dynamic_type = 1;
 
+
 drop table if exists test;
 
 {% for engine in ['MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000',

From 9fd9f649db6bd3149cda217a200cfee3d7bc8238 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 20 Aug 2024 21:53:42 +0200
Subject: [PATCH 290/363] Add comment in IDataType.cpp

---
 src/DataTypes/IDataType.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index d14461a4ff1..1a274c7f993 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -150,6 +150,7 @@ std::unique_ptr<IDataType::SubstreamData> IDataType::getSubcolumnData(
 
     ISerialization::EnumerateStreamsSettings settings;
     settings.position_independent_encoding = false;
+    /// Don't enumerate dynamic subcolumns, they are handled separately.
     settings.enumerate_dynamic_streams = false;
     data.serialization->enumerateStreams(settings, callback_with_data, data);
 

From 5d280053c2512332bc0c171e57dbc36cb3e0b675 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 20 Aug 2024 21:55:10 +0200
Subject: [PATCH 291/363] Add comment in ISerialization.h

---
 src/DataTypes/Serializations/ISerialization.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h
index d19cb0dd365..33575a07177 100644
--- a/src/DataTypes/Serializations/ISerialization.h
+++ b/src/DataTypes/Serializations/ISerialization.h
@@ -241,6 +241,9 @@ public:
     {
         SubstreamPath path;
         bool position_independent_encoding = true;
+        /// If set to false, don't enumerate dynamic subcolumns
+        /// (such as dynamic types in Dynamic column or dynamic paths in JSON column).
+        /// It may be needed when dynamic subcolumns are processed separately.
         bool enumerate_dynamic_streams = true;
     };
 

From 7fe98ac30224d033110faa90b312f4badc25ca32 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Tue, 20 Aug 2024 22:20:26 +0000
Subject: [PATCH 292/363] Fix stress_tests.lib potentially mistaking its own
 search string for a crash message

---
 tests/docker_scripts/stress_tests.lib | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/docker_scripts/stress_tests.lib b/tests/docker_scripts/stress_tests.lib
index e2b5d983fcb..4f3e6eeb2f4 100644
--- a/tests/docker_scripts/stress_tests.lib
+++ b/tests/docker_scripts/stress_tests.lib
@@ -273,7 +273,7 @@ function check_logs_for_critical_errors()
     [ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
 
     # Crash
-    rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
+    rg -Fa "###################""#####################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
         && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
         || echo -e "Not crashed$OK" >> /test_output/test_results.tsv
 
@@ -285,7 +285,7 @@ function check_logs_for_critical_errors()
     # Remove file fatal_messages.txt if it's empty
     [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
 
-    rg -Faz "########################################" /test_output/* > /dev/null \
+    rg -Faz "####################""####################" /test_output/* > /dev/null \
       && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
 
     function get_gdb_log_context()

From 3c7e2389d1207582a81ac28f66bd28ed9329c489 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Wed, 21 Aug 2024 09:14:50 +0800
Subject: [PATCH 293/363] Fix code style

---
 .../test_incorrect_datetime_format/test.py       | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py
index c1803aa95a1..56c061f3830 100644
--- a/tests/integration/test_incorrect_datetime_format/test.py
+++ b/tests/integration/test_incorrect_datetime_format/test.py
@@ -2,6 +2,7 @@ import logging
 import pytest
 from helpers.cluster import ClickHouseCluster
 
+
 @pytest.fixture(scope="module")
 def cluster():
     try:
@@ -10,7 +11,7 @@ def cluster():
             "node",
             main_configs=[
                 "configs/config.d/cluster.xml",
-            ]
+            ],
         )
         logging.info("Starting cluster...")
         cluster.start()
@@ -28,7 +29,8 @@ def test_incorrect_datetime_format(cluster):
 
     node = cluster.instances["node"]
 
-    node.query("""
+    node.query(
+        """
         CREATE TABLE tab
         (
             a DateTime,
@@ -40,10 +42,12 @@ def test_incorrect_datetime_format(cluster):
     res = node.query("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'").strip()
     assert res == "0"
 
-    error = node.query_and_get_error("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'").strip()
-    print(error)
+    error = node.query_and_get_error(
+        "SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'"
+    ).strip()
     assert "Cannot parse time component of DateTime 09:58:0" in error
 
-    error = node.query_and_get_error("SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'").strip()
-    print(error)
+    error = node.query_and_get_error(
+        "SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'"
+    ).strip()
     assert "Cannot convert string '2024-08-0 09:58:09' to type DateTime" in error

From 471320dc6f8781e43e5d507086a72a6552caf531 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Wed, 21 Aug 2024 14:15:44 +0800
Subject: [PATCH 294/363] Fix flaky test error

---
 .../test_incorrect_datetime_format/test.py    | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py
index 56c061f3830..3cdc6781534 100644
--- a/tests/integration/test_incorrect_datetime_format/test.py
+++ b/tests/integration/test_incorrect_datetime_format/test.py
@@ -17,6 +17,17 @@ def cluster():
         cluster.start()
         logging.info("Cluster started")
 
+        node = cluster.instances["node"]
+        node.query(
+            """
+            CREATE TABLE tab
+            (
+                a DateTime,
+                pk String
+            ) Engine = MergeTree() ORDER BY pk;
+            """
+        )
+
         yield cluster
     finally:
         cluster.shutdown()
@@ -29,16 +40,6 @@ def test_incorrect_datetime_format(cluster):
 
     node = cluster.instances["node"]
 
-    node.query(
-        """
-        CREATE TABLE tab
-        (
-            a DateTime,
-            pk String
-        ) Engine = MergeTree() ORDER BY pk;
-        """
-    )
-
     res = node.query("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'").strip()
     assert res == "0"
 

From e01a448bcc62a7e292766cddc0c817b9e44558d4 Mon Sep 17 00:00:00 2001
From: Zhigao Hong <zghong97@outlook.com>
Date: Wed, 21 Aug 2024 15:35:33 +0800
Subject: [PATCH 295/363] Fix invalid characters in replica_name

---
 src/Storages/MergeTree/registerStorageMergeTree.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 44548e33d46..9a65d590453 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -538,6 +538,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
 
         if (replica_name.empty())
             throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message);
+        // '\t' and '\n' will interrupt parsing 'source replica' in ReplicatedMergeTreeLogEntryData::readText
+        if (replica_name.find('\t') != String::npos || replica_name.find('\n') != String::npos)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must not contain '\\t' or '\\n'");
 
         arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments.
         arg_num = 2;                  /// zookeeper_path and replica_name together are always two arguments.

From 47a245cb65e6550769f6a3621a8902b43bfd9160 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Wed, 21 Aug 2024 12:11:47 +0200
Subject: [PATCH 296/363] Remove wrong release version

---
 docs/changelogs/v23.8.16.16-lts.md   | 38 ----------------------------
 utils/list-versions/version_date.tsv |  1 -
 2 files changed, 39 deletions(-)
 delete mode 100644 docs/changelogs/v23.8.16.16-lts.md

diff --git a/docs/changelogs/v23.8.16.16-lts.md b/docs/changelogs/v23.8.16.16-lts.md
deleted file mode 100644
index 9532db4fb0a..00000000000
--- a/docs/changelogs/v23.8.16.16-lts.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-sidebar_position: 1
-sidebar_label: 2024
----
-
-# 2024 Changelog
-
-### ClickHouse release v23.8.16.16-lts (b80cac57ead) FIXME as compared to v23.8.15.35-lts (060ff8e813a)
-
-#### Improvement
-* Backported in [#66962](https://github.com/ClickHouse/ClickHouse/issues/66962): Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
-* Backported in [#65461](https://github.com/ClickHouse/ClickHouse/issues/65461): Reload certificate chain during certificate reload. [#61671](https://github.com/ClickHouse/ClickHouse/pull/61671) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)).
-* Backported in [#65880](https://github.com/ClickHouse/ClickHouse/issues/65880): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
-* Backported in [#65912](https://github.com/ClickHouse/ClickHouse/issues/65912): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
-
-#### Bug Fix (user-visible misbehavior in an official stable release)
-* Backported in [#65351](https://github.com/ClickHouse/ClickHouse/issues/65351): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
-* Backported in [#66037](https://github.com/ClickHouse/ClickHouse/issues/66037): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Backported in [#65281](https://github.com/ClickHouse/ClickHouse/issues/65281): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)).
-* Backported in [#65368](https://github.com/ClickHouse/ClickHouse/issues/65368): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
-* Backported in [#65782](https://github.com/ClickHouse/ClickHouse/issues/65782): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
-* Backported in [#65743](https://github.com/ClickHouse/ClickHouse/issues/65743): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
-* Backported in [#65926](https://github.com/ClickHouse/ClickHouse/issues/65926): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
-* Backported in [#65822](https://github.com/ClickHouse/ClickHouse/issues/65822): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
-* Backported in [#66322](https://github.com/ClickHouse/ClickHouse/issues/66322): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
-* Backported in [#66449](https://github.com/ClickHouse/ClickHouse/issues/66449): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Backported in [#66717](https://github.com/ClickHouse/ClickHouse/issues/66717): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
-* Backported in [#67320](https://github.com/ClickHouse/ClickHouse/issues/67320): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
-
-#### NOT FOR CHANGELOG / INSIGNIFICANT
-
-* Backported in [#65080](https://github.com/ClickHouse/ClickHouse/issues/65080): Follow up to [#56541](https://github.com/ClickHouse/ClickHouse/issues/56541). [#57141](https://github.com/ClickHouse/ClickHouse/pull/57141) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* Backported in [#64997](https://github.com/ClickHouse/ClickHouse/issues/64997): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)).
-* Backported in [#65913](https://github.com/ClickHouse/ClickHouse/issues/65913): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
-* Backported in [#66853](https://github.com/ClickHouse/ClickHouse/issues/66853): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
-* Backported in [#67072](https://github.com/ClickHouse/ClickHouse/issues/67072): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
-* Update version after release. [#67691](https://github.com/ClickHouse/ClickHouse/pull/67691) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 776a53ec01c..95ef8c0de90 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -56,7 +56,6 @@ v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
 v23.8.16.40-lts	2024-08-02
-v23.8.16.16-lts	2024-08-20
 v23.8.15.35-lts	2024-06-14
 v23.8.14.6-lts	2024-05-02
 v23.8.13.25-lts	2024-04-26

From 915daafd3a0c9f1539dad75dc3805e740f0bc75a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 21 Aug 2024 10:45:48 +0000
Subject: [PATCH 297/363] Fix 01086_window_view_cleanup.sh

---
 tests/queries/0_stateless/01086_window_view_cleanup.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh
index 1bfa3c50869..2e6cc7e2520 100755
--- a/tests/queries/0_stateless/01086_window_view_cleanup.sh
+++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh
@@ -13,7 +13,8 @@ opts=(
 
 DATABASE_ORDINARY="${CLICKHOUSE_DATABASE}_ordinary"
 
-$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 "
+$CLICKHOUSE_CLIENT "${opts[@]}" --query "
+    SET allow_deprecated_database_ordinary = 1;
     SET allow_experimental_window_view = 1;
     SET window_view_clean_interval = 1;
 
@@ -28,8 +29,7 @@ $CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 "
     INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 2, toDateTime('1990/01/01 12:00:01', 'US/Samoa'));
     INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 3, toDateTime('1990/01/01 12:00:02', 'US/Samoa'));
     INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 4, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));
-    INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));
-"
+    INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));"
 
 while true; do
 	$CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT count(*) FROM ${DATABASE_ORDINARY}.\`.inner.wv\`" | grep -q "5" && break || sleep .5 ||:

From 8bf103e82f62b1484801fd95f7f73a42a33f7fef Mon Sep 17 00:00:00 2001
From: maxvostrikov <max.vostrikov@clickhouse.com>
Date: Wed, 21 Aug 2024 12:49:33 +0200
Subject: [PATCH 298/363] squash! materialized_view_deduplication performance
 comparison test performance comparison test to check deduplication in
 MATERIALIZED VIEW's. Logic is similar to, but with a bigger insert
 tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql

---
 tests/performance/materialized_view_deduplication.xml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/performance/materialized_view_deduplication.xml b/tests/performance/materialized_view_deduplication.xml
index 621effd23dd..e5e0e5fc6e4 100644
--- a/tests/performance/materialized_view_deduplication.xml
+++ b/tests/performance/materialized_view_deduplication.xml
@@ -2,7 +2,6 @@
     <settings>
         <deduplicate_blocks_in_dependent_materialized_views>1</deduplicate_blocks_in_dependent_materialized_views>
     </settings>
-
     <create_query>
         CREATE TABLE dst (`key` Int64, `value` String)
         ENGINE = MergeTree ORDER BY tuple()

From c8bc7a124046ed4789ffdd6ce953f7764f88df09 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 21 Aug 2024 14:52:11 +0200
Subject: [PATCH 299/363] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bd9b8f66ad0..17b6dcd2ac1 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@ Every month we get together with the community (users, contributors, customers,
 
 Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
 
-* [ClickHouse Guangzho User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
+* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
 
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

From 4a7a04b35b492aec779b42adc6f3f3eae354a947 Mon Sep 17 00:00:00 2001
From: leonkozlowski <leon.kozlowski@flocksafety.com>
Date: Wed, 21 Aug 2024 10:13:02 -0400
Subject: [PATCH 300/363] patch: build


From 1afd3a7c3a7569b172ac3238f798c7850fd41bcf Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:24:43 +0200
Subject: [PATCH 301/363] give priority to parsed columns over storage columns

---
 src/Storages/Hive/StorageHive.cpp             |  2 +-
 .../ObjectStorage/StorageObjectStorage.cpp    |  2 +-
 .../StorageObjectStorageCluster.cpp           |  2 +-
 .../StorageObjectStorageSource.cpp            |  4 ++--
 .../StorageObjectStorageSource.h              |  4 ++--
 .../ObjectStorageQueueSource.cpp              |  2 +-
 .../ObjectStorageQueueSource.h                |  2 +-
 .../StorageObjectStorageQueue.cpp             |  2 +-
 src/Storages/StorageFile.cpp                  |  6 ++---
 src/Storages/StorageFileCluster.cpp           |  2 +-
 src/Storages/StorageURL.cpp                   |  6 ++---
 src/Storages/StorageURLCluster.cpp            |  2 +-
 src/Storages/VirtualColumnUtils.cpp           | 23 +++++++++++--------
 src/Storages/VirtualColumnUtils.h             |  4 ++--
 14 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp
index ae2e8cffe28..ea2e9e3eece 100644
--- a/src/Storages/Hive/StorageHive.cpp
+++ b/src/Storages/Hive/StorageHive.cpp
@@ -444,8 +444,8 @@ StorageHive::StorageHive(
     storage_metadata.setComment(comment_);
     storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
 
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, getContext()));
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext()));
 }
 
 void StorageHive::lazyInitialize()
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index d9c82d68791..a0f189e92fc 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -94,7 +94,7 @@ StorageObjectStorage::StorageObjectStorage(
     if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning)
         sample_path = getPathSample(metadata, context);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context, sample_path, format_settings));
     setInMemoryMetadata(metadata);
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index c214665f7e0..08a0739d929 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -68,7 +68,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
     if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning)
         sample_path = getPathSample(metadata, context_);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context_, sample_path));
     setInMemoryMetadata(metadata);
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index d8e26977e75..04e319cd0b8 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -208,7 +208,7 @@ Chunk StorageObjectStorageSource::generate()
                   .filename = &filename,
                   .last_modified = object_info->metadata->last_modified,
                   .etag = &(object_info->metadata->etag)
-                }, getContext(), read_from_format_info.columns_description);
+                }, getContext());
 
             const auto & partition_columns = configuration->getPartitionColumns();
             if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
@@ -280,7 +280,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
     const std::shared_ptr<IIterator> & file_iterator,
     const ConfigurationPtr & configuration,
     const ObjectStoragePtr & object_storage,
-    const ReadFromFormatInfo & read_from_format_info,
+    ReadFromFormatInfo & read_from_format_info,
     const std::optional<FormatSettings> & format_settings,
     const std::shared_ptr<const KeyCondition> & key_condition_,
     const ContextPtr & context_,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 6681dbf4578..7ae7a2358e9 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -74,7 +74,7 @@ protected:
     const UInt64 max_block_size;
     const bool need_only_count;
     const size_t max_parsing_threads;
-    const ReadFromFormatInfo read_from_format_info;
+    ReadFromFormatInfo read_from_format_info;
     const std::shared_ptr<ThreadPool> create_reader_pool;
 
     std::shared_ptr<IIterator> file_iterator;
@@ -122,7 +122,7 @@ protected:
         const std::shared_ptr<IIterator> & file_iterator,
         const ConfigurationPtr & configuration,
         const ObjectStoragePtr & object_storage,
-        const ReadFromFormatInfo & read_from_format_info,
+        ReadFromFormatInfo & read_from_format_info,
         const std::optional<FormatSettings> & format_settings,
         const std::shared_ptr<const KeyCondition> & key_condition_,
         const ContextPtr & context_,
diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
index 2634a7b2f1e..cde41b4afff 100644
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
@@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl()
                     {
                         .path = path,
                         .size = reader.getObjectInfo()->metadata->size_bytes
-                    }, getContext(), read_from_format_info.columns_description);
+                    }, getContext());
 
                 return chunk;
             }
diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h
index 0f3d0ab2e92..c085287e4f3 100644
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h
@@ -128,7 +128,7 @@ private:
     const std::shared_ptr<FileIterator> file_iterator;
     const ConfigurationPtr configuration;
     const ObjectStoragePtr object_storage;
-    const ReadFromFormatInfo read_from_format_info;
+    ReadFromFormatInfo read_from_format_info;
     const std::optional<FormatSettings> format_settings;
     const ObjectStorageQueueSettings queue_settings;
     const std::shared_ptr<ObjectStorageQueueMetadata> files_metadata;
diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
index 5dc3e01962c..9452ce81e9e 100644
--- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
+++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
@@ -169,7 +169,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
     storage_metadata.setColumns(columns);
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_));
     setInMemoryMetadata(storage_metadata);
 
     LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 766b7722cdf..50294df32a4 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1112,9 +1112,9 @@ void StorageFile::setStorageMetadata(CommonArguments args)
 
     storage_metadata.setConstraints(args.constraints);
     storage_metadata.setComment(args.comment);
-    setInMemoryMetadata(storage_metadata);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, args.getContext(), paths.empty() ? "" : paths[0], format_settings));
+    setInMemoryMetadata(storage_metadata);
 }
 
 
@@ -1468,7 +1468,7 @@ Chunk StorageFileSource::generate()
                     .size = current_file_size,
                     .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
                     .last_modified = current_file_last_modified
-                }, getContext(), columns_description);
+                }, getContext());
 
             return chunk;
         }
diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp
index 82ae0b761ae..c01738067c4 100644
--- a/src/Storages/StorageFileCluster.cpp
+++ b/src/Storages/StorageFileCluster.cpp
@@ -60,8 +60,8 @@ StorageFileCluster::StorageFileCluster(
     }
 
     storage_metadata.setConstraints(constraints_);
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, paths.empty() ? "" : paths[0]));
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0]));
 }
 
 void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 6442891cf23..fc1354b780a 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -165,9 +165,9 @@ IStorageURLBase::IStorageURLBase(
 
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_, getSampleURI(uri, context_), format_settings));
+    setInMemoryMetadata(storage_metadata);
 }
 
 
@@ -435,7 +435,7 @@ Chunk StorageURLSource::generate()
                 {
                     .path = curr_uri.getPath(),
                     .size = current_file_size,
-                }, getContext(), columns_description);
+                }, getContext());
             return chunk;
         }
 
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index 7c7a299c64e..140413d78b0 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -75,8 +75,8 @@ StorageURLCluster::StorageURLCluster(
     }
 
     storage_metadata.setConstraints(constraints_);
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, getSampleURI(uri, context)));
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context)));
 }
 
 void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index edf50907752..5b974cb8a22 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -129,7 +129,7 @@ NameSet getVirtualNamesForFileLikeStorage()
     return {"_path", "_file", "_size", "_time", "_etag"};
 }
 
-std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
+std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
 {
     std::string pattern = "([^/]+)=([^/]+)/";
     re2::StringPiece input_piece(path);
@@ -145,34 +145,37 @@ std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(
         used_keys.insert({key, value});
 
         auto col_name = key;
-        while (storage_columns.has(col_name))
-            col_name = "_" + col_name;
         key_values[col_name] = value;
     }
     return key_values;
 }
 
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
+VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
 {
     VirtualColumnsDescription desc;
 
     auto add_virtual = [&](const auto & name, const auto & type)
     {
-        if (storage_columns.has(name))
+        auto local_type = type;
+        if (storage_columns.has(name) && !context->getSettingsRef().use_hive_partitioning)
             return;
+        if (storage_columns.has(name))
+        {
+            local_type = storage_columns.get(name).type;
+            storage_columns.remove(name);
+        }
 
-        desc.addEphemeral(name, type, "");
+        desc.addEphemeral(name, local_type, "");
     };
 
     add_virtual("_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
     add_virtual("_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
     add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
     add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
-    add_virtual("_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
 
     if (context->getSettingsRef().use_hive_partitioning)
     {
-        auto map = parseHivePartitioningKeysAndValues(path, storage_columns);
+        auto map = parseHivePartitioningKeysAndValues(path);
         auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
         for (auto & item : map)
         {
@@ -245,11 +248,11 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns)
+    VirtualsForFileLikeStorage virtual_values, ContextPtr context)
 {
     std::unordered_map<std::string, std::string> hive_map;
     if (context->getSettingsRef().use_hive_partitioning)
-        hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns);
+        hive_map = parseHivePartitioningKeysAndValues(virtual_values.path);
 
     for (const auto & virtual_column : requested_virtual_columns)
     {
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 23e16871798..6aa08b2aef2 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -70,7 +70,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
 
 NameSet getVirtualNamesForFileLikeStorage();
 VirtualColumnsDescription getVirtualsForFileLikeStorage(
-    const ColumnsDescription & storage_columns,
+    ColumnsDescription & storage_columns,
     const ContextPtr & context,
     const std::string & sample_path = "",
     std::optional<FormatSettings> format_settings_ = std::nullopt);
@@ -105,7 +105,7 @@ struct VirtualsForFileLikeStorage
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns);
+    VirtualsForFileLikeStorage virtual_values, ContextPtr context);
 }
 
 }

From ccff8cef80394934e1567fd0c669b8f67fe7b660 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 21 Aug 2024 14:27:49 +0000
Subject: [PATCH 302/363] Update version_date.tsv and changelogs after
 v24.8.1.2684-lts

---
 SECURITY.md                          |  16 +-
 docker/keeper/Dockerfile             |   2 +-
 docker/server/Dockerfile.alpine      |   2 +-
 docker/server/Dockerfile.ubuntu      |   2 +-
 docs/changelogs/v24.8.1.2684-lts.md  | 525 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |   1 +
 6 files changed, 532 insertions(+), 16 deletions(-)
 create mode 100644 docs/changelogs/v24.8.1.2684-lts.md

diff --git a/SECURITY.md b/SECURITY.md
index 8930dc96f8a..93c48f1d9ba 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -14,25 +14,15 @@ The following versions of ClickHouse server are currently supported with securit
 
 | Version | Supported |
 |:-|:-|
+| 24.8 | ✔️ |
 | 24.7 | ✔️ |
 | 24.6 | ✔️ |
-| 24.5 | ✔️ |
+| 24.5 | ❌ |
 | 24.4 | ❌ |
 | 24.3 | ✔️ |
 | 24.2 | ❌ |
 | 24.1 | ❌ |
-| 23.12 | ❌ |
-| 23.11 | ❌ |
-| 23.10 | ❌ |
-| 23.9 | ❌ |
-| 23.8 | ✔️ |
-| 23.7 | ❌ |
-| 23.6 | ❌ |
-| 23.5 | ❌ |
-| 23.4 | ❌ |
-| 23.3 | ❌ |
-| 23.2 | ❌ |
-| 23.1 | ❌ |
+| 23.* | ❌ |
 | 22.* | ❌ |
 | 21.* | ❌ |
 | 20.* | ❌ |
diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index a44664259fb..fc93cee5bbc 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.7.3.42"
+ARG VERSION="24.8.1.2684"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 2565828c846..3ceaf2a08b4 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.7.3.42"
+ARG VERSION="24.8.1.2684"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5ac8a58afea..76db997821c 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.7.3.42"
+ARG VERSION="24.8.1.2684"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 #docker-official-library:off
diff --git a/docs/changelogs/v24.8.1.2684-lts.md b/docs/changelogs/v24.8.1.2684-lts.md
new file mode 100644
index 00000000000..8171bb3d719
--- /dev/null
+++ b/docs/changelogs/v24.8.1.2684-lts.md
@@ -0,0 +1,525 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.8.1.2684-lts (161c62fd295) FIXME as compared to v24.8.1.1-new (3f8b27d7acc)
+
+#### Backward Incompatible Change
+* `clickhouse-client` and `clickhouse-local` now default to multi-query mode (instead single-query mode). As an example, `clickhouse-client -q "SELECT 1; SELECT 2"` now works, whereas users previously had to add `--multiquery` (or `-n`). The `--multiquery/-n` switch became obsolete. INSERT queries in multi-query statements are treated specially based on their FORMAT clause: If the FORMAT is `VALUES` (the most common case), the end of the INSERT statement is represented by a trailing semicolon `;` at the end of the query. For all other FORMATs (e.g. `CSV` or `JSONEachRow`), the end of the INSERT statement is represented by two newlines `\n\n` at the end of the query. [#63898](https://github.com/ClickHouse/ClickHouse/pull/63898) ([FFish](https://github.com/wxybear)).
+* In previous versions, it was possible to use an alternative syntax for `LowCardinality` data types by appending `WithDictionary` to the name of the data type. It was an initial working implementation, and it was never documented or exposed to the public. Now, it is deprecated. If you have used this syntax, you have to ALTER your tables and rename the data types to `LowCardinality`. [#66842](https://github.com/ClickHouse/ClickHouse/pull/66842) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix logical errors with storage `Buffer` used with distributed destination table. It's a backward incompatible change: queries using `Buffer` with a distributed destination table may stop working if the table appears more than once in the query (e.g., in a self-join). [#67015](https://github.com/ClickHouse/ClickHouse/pull/67015) ([vdimir](https://github.com/vdimir)).
+* In previous versions, calling functions for random distributions based on the Gamma function (such as Chi-Squared, Student, Fisher) with negative arguments close to zero led to a long computation or an infinite loop. In the new version, calling these functions with zero or negative arguments will produce an exception. This closes [#67297](https://github.com/ClickHouse/ClickHouse/issues/67297). [#67326](https://github.com/ClickHouse/ClickHouse/pull/67326) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The system table `text_log` is enabled by default. This is fully compatible with previous versions, but you may notice subtly increased disk usage on the local disk (this system table takes a tiny amount of disk space). [#67428](https://github.com/ClickHouse/ClickHouse/pull/67428) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* In previous versions, `arrayWithConstant` can be slow if asked to generate very large arrays. In the new version, it is limited to 1 GB per array. This closes [#32754](https://github.com/ClickHouse/ClickHouse/issues/32754). [#67741](https://github.com/ClickHouse/ClickHouse/pull/67741) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix REPLACE modifier formatting (forbid omitting brackets). [#67774](https://github.com/ClickHouse/ClickHouse/pull/67774) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68349](https://github.com/ClickHouse/ClickHouse/issues/68349): Reimplement Dynamic type. Now when the limit of dynamic data types is reached new types are not casted to String but stored in a special data structure in binary format with binary encoded data type. Now any type ever inserted into Dynamic column can be read from it as subcolumn. [#68132](https://github.com/ClickHouse/ClickHouse/pull/68132) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### New Feature
+* Add new experimental Kafka storage engine to store offsets in Keeper instead of relying on committing them to Kafka. [#57625](https://github.com/ClickHouse/ClickHouse/pull/57625) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Add new TimeSeries table engine: - by default: ``` CREATE TABLE tbl ENGINE=TimeSeries ``` - or with specifying engines of its internal tables:. [#64183](https://github.com/ClickHouse/ClickHouse/pull/64183) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Support more join strictnesses (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y (see setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)).
+* Add `_etag` virtual column for S3 table engine. Fixes [#65312](https://github.com/ClickHouse/ClickHouse/issues/65312). [#65386](https://github.com/ClickHouse/ClickHouse/pull/65386) ([skyoct](https://github.com/skyoct)).
+* This pull request introduces Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Add function printf for spark compatiability. [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)).
+* Backported in [#68450](https://github.com/ClickHouse/ClickHouse/issues/68450): Implement new JSON data type. [#66444](https://github.com/ClickHouse/ClickHouse/pull/66444) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add a new server setting: disable_insertion_and_mutation Set it to true. This node will deny all insertions and mutations(Alter table delete/update/drop partition). Include async insertion. [#66519](https://github.com/ClickHouse/ClickHouse/pull/66519) ([Xu Jia](https://github.com/XuJia0210)).
+* Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to Null engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)).
+* Added support for reading MULTILINESTRING geometry in WKT format using function readWKTLineString. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)).
+* Add a new table function `fuzzQuery`. This function allows the modification of a given query string with random variations. Example: `SELECT query FROM fuzzQuery('SELECT 1') LIMIT 5;`. [#67655](https://github.com/ClickHouse/ClickHouse/pull/67655) ([pufit](https://github.com/pufit)).
+* Support query `DROP DETACHED PARTITION ALL` to drop all detached partitions. [#67885](https://github.com/ClickHouse/ClickHouse/pull/67885) ([Duc Canh Le](https://github.com/canhld94)).
+* Added a tagging (namespace) mechanism for the query cache. The same queries with different tags are considered different by the query cache. Example: `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'abc'` and `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'def'` now create different query cache entries. [#68235](https://github.com/ClickHouse/ClickHouse/pull/68235) ([sakulali](https://github.com/sakulali)).
+
+#### Performance Improvement
+* Use adaptive read task size calculation method (adaptive meaning it depends on read column sizes) for parallel replicas. [#60377](https://github.com/ClickHouse/ClickHouse/pull/60377) ([Nikita Taranov](https://github.com/nickitat)).
+* Store the `plain_rewritable` disk directory metadata in `__meta` layout, separately from the merge tree data in the object storage. Move the `plain_rewritable` disk to a flat directory structure. [#65751](https://github.com/ClickHouse/ClickHouse/pull/65751) ([Julia Kartseva](https://github.com/jkartseva)).
+* Enable `compile_expressions` (JIT compiler for fragments of ordinary expressions) by default. This closes [#51264](https://github.com/ClickHouse/ClickHouse/issues/51264) and [#56386](https://github.com/ClickHouse/ClickHouse/issues/56386). [#66486](https://github.com/ClickHouse/ClickHouse/pull/66486) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Improve columns squashing for String/Array/Map/Variant/Dynamic types by reserving required memory in advance for all subcolumns. [#67043](https://github.com/ClickHouse/ClickHouse/pull/67043) ([Kruglov Pavel](https://github.com/Avogar)).
+* Speed up system flush logs, flush logs on shutdown. [#67472](https://github.com/ClickHouse/ClickHouse/pull/67472) ([Sema Checherinda](https://github.com/CheSema)).
+* Backported in [#68496](https://github.com/ClickHouse/ClickHouse/issues/68496): Improved overall performance of merges by reducing the overhead of scheduling steps of merges. [#68016](https://github.com/ClickHouse/ClickHouse/pull/68016) ([Anton Popov](https://github.com/CurtizJ)).
+* Setting `optimize_functions_to_subcolumns` is enabled by default. [#68053](https://github.com/ClickHouse/ClickHouse/pull/68053) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### Improvement
+* ClickHouse server now supports new setting `max_keep_alive_requests`. For keep-alive HTTP connections to the server it works in tandem with `keep_alive_timeout` - if idle timeout not expired but there already more than `max_keep_alive_requests` requests done through the given connection - it will be closed by the server. [#61793](https://github.com/ClickHouse/ClickHouse/pull/61793) ([Nikita Taranov](https://github.com/nickitat)).
+* As in the new version, SOURCES are checked based on Table Engine logic, even grant table engine is disabled by default, if a source is not granted, a prompt of table engine would popup instead, which is misleading. [#65419](https://github.com/ClickHouse/ClickHouse/pull/65419) ([jsc0218](https://github.com/jsc0218)).
+* Added statistics type `count_min` (count-min sketches) which provide selectivity estimations for equality predicates like `col = 'val'`. Supported data types are string, date, datatime and numeric types. [#65521](https://github.com/ClickHouse/ClickHouse/pull/65521) ([JackyWoo](https://github.com/JackyWoo)).
+* Do not pass logs for keeper explicitly in the image to allow overriding. [#65564](https://github.com/ClickHouse/ClickHouse/pull/65564) ([Azat Khuzhin](https://github.com/azat)).
+* Use `Atomic` database by default in `clickhouse-local`. Address items 1 and 5 from [#50647](https://github.com/ClickHouse/ClickHouse/issues/50647). Closes [#44817](https://github.com/ClickHouse/ClickHouse/issues/44817). [#65860](https://github.com/ClickHouse/ClickHouse/pull/65860) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add the `rows_before_aggregation_at_least` statistic to the query response when `rows_before_aggregation` is enabled. This statistic represents the number of rows read before aggregation. In the context of a distributed query, when using the `group by` or `max` aggregation function without a `limit`, `rows_before_aggregation_at_least` can reflect the number of rows hit by the query. [#66084](https://github.com/ClickHouse/ClickHouse/pull/66084) ([morning-color](https://github.com/morning-color)).
+* Introduced `use_same_password_for_base_backup` settings for `BACKUP` and `RESTORE` queries, allowing to create and restore incremental backups to/from password protected archives. [#66214](https://github.com/ClickHouse/ClickHouse/pull/66214) ([Samuele](https://github.com/sguerrini97)).
+* Ignore async_load_databases for ATTACH query (previously it was possible for ATTACH to return before the tables had been attached). [#66240](https://github.com/ClickHouse/ClickHouse/pull/66240) ([Azat Khuzhin](https://github.com/azat)).
+* [Replicated]MergeTreeSink has to properly cancel its delayed_chunk on `onCancel()` method. [#66279](https://github.com/ClickHouse/ClickHouse/pull/66279) ([Sema Checherinda](https://github.com/CheSema)).
+* Added logs and metrics for rejected connections (where there are not enough resources). [#66410](https://github.com/ClickHouse/ClickHouse/pull/66410) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Setting `allow_experimental_analyzer` is renamed to `enable_analyzer`. The old name is preserved in a form of an alias. [#66438](https://github.com/ClickHouse/ClickHouse/pull/66438) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Support true UUID type for MongoDB engine. [#66671](https://github.com/ClickHouse/ClickHouse/pull/66671) ([Azat Khuzhin](https://github.com/azat)).
+* Added a new `MergeTree` setting `deduplicate_merge_projection_mode` to control the projections during merges (for specific engines) and `OPTIMIZE DEDUPLICATE` query. Supported options: `throw` (throw an exception in case the projection is not fully supported for *MergeTree engine), `drop` (remove projection during merge if it can't be merged itself consistently) and `rebuild` (rebuild projection from scratch, which is a heavy operation). [#66672](https://github.com/ClickHouse/ClickHouse/pull/66672) ([jsc0218](https://github.com/jsc0218)).
+* Add replication lag and recovery time metrics. [#66703](https://github.com/ClickHouse/ClickHouse/pull/66703) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
+* Add S3DiskNoKeyErrors metric. [#66704](https://github.com/ClickHouse/ClickHouse/pull/66704) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
+* Ensure COMMENT clause works for all table engines. [#66832](https://github.com/ClickHouse/ClickHouse/pull/66832) ([Joe Lynch](https://github.com/joelynch)).
+* Update the usage of error code `BAD_ARGUMENTS` and `ILLEGAL_TYPE_OF_ARGUMENT` by more accurate error codes when appropriate. [#66851](https://github.com/ClickHouse/ClickHouse/pull/66851) ([Yohann Jardin](https://github.com/yohannj)).
+* Function `mapFromArrays` now accepts `Map(K, V)` as first argument, for example: `SELECT mapFromArrays(map('a', 4, 'b', 4), ['aa', 'bb'])` now works and returns `{('a',4):'aa',('b',4):'bb'}`. Also, if the 1st argument is an Array, it can now also be of type `Array(Nullable(T))` or `Array(LowCardinality(Nullable(T)))` as long as the actual array values are not `NULL`. [#67103](https://github.com/ClickHouse/ClickHouse/pull/67103) ([李扬](https://github.com/taiyang-li)).
+* Read configuration for clickhouse-local from ~/.clickhouse-local. [#67135](https://github.com/ClickHouse/ClickHouse/pull/67135) ([Azat Khuzhin](https://github.com/azat)).
+* Rename setting `input_format_orc_read_use_writer_time_zone` to `input_format_orc_reader_timezone` and allow the user to set the reader timezone. [#67175](https://github.com/ClickHouse/ClickHouse/pull/67175) ([kevinyhzou](https://github.com/KevinyhZou)).
+* Decrease level of 'Socket is not connected' error when HTTP connection immediately reset by peer after connecting, close [#34218](https://github.com/ClickHouse/ClickHouse/issues/34218). [#67177](https://github.com/ClickHouse/ClickHouse/pull/67177) ([vdimir](https://github.com/vdimir)).
+* Speed up tables removal for `DROP DATABASE` query, increased the default value for `database_catalog_drop_table_concurrency` to 16. [#67228](https://github.com/ClickHouse/ClickHouse/pull/67228) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Add ability to load dashboards for system.dashboards from config (once set, they overrides the default dashboards preset). [#67232](https://github.com/ClickHouse/ClickHouse/pull/67232) ([Azat Khuzhin](https://github.com/azat)).
+* The window functions in SQL are traditionally in snake case. ClickHouse uses camelCase, so new aliases `denseRank()` and `percentRank()` have been created. These new functions can be called the exact same as the original `dense_rank()` and `percent_rank()` functions. Both snake case and camelCase syntaxes remain usable. A new test for each of the functions has been added as well. This closes [#67042](https://github.com/ClickHouse/ClickHouse/issues/67042) . [#67334](https://github.com/ClickHouse/ClickHouse/pull/67334) ([Peter Nguyen](https://github.com/petern48)).
+* Autodetect configuration file format if is not .xml, .yml or .yaml. If the file begins with < it might be XML, otherwise it might be YAML. Non regular file just parse as XML such as PIPE: /dev/fd/X. [#67391](https://github.com/ClickHouse/ClickHouse/pull/67391) ([sakulali](https://github.com/sakulali)).
+* Functions `formatDateTime` and `formatDateTimeInJodaSyntax` now treat their format parameter as optional. If it is not specified, format strings `%Y-%m-%d %H:%i:%s` and `yyyy-MM-dd HH:mm:ss` are assumed. Example: `SELECT parseDateTime('2021-01-04 23:12:34')` now returns DateTime value `2021-01-04 23:12:34` (previously, this threw an exception). [#67399](https://github.com/ClickHouse/ClickHouse/pull/67399) ([Robert Schulze](https://github.com/rschu1ze)).
+* Automatically retry Keeper requests in KeeperMap if they happen because of timeout or connection loss. [#67448](https://github.com/ClickHouse/ClickHouse/pull/67448) ([Antonio Andelic](https://github.com/antonio2368)).
+* Rework usage of custom table's disks. [#67684](https://github.com/ClickHouse/ClickHouse/pull/67684) ([Sema Checherinda](https://github.com/CheSema)).
+* Various improvements in the advanced dashboard. This closes [#67697](https://github.com/ClickHouse/ClickHouse/issues/67697). This closes [#63407](https://github.com/ClickHouse/ClickHouse/issues/63407). This closes [#51129](https://github.com/ClickHouse/ClickHouse/issues/51129). This closes [#61204](https://github.com/ClickHouse/ClickHouse/issues/61204). [#67701](https://github.com/ClickHouse/ClickHouse/pull/67701) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Avoid allocate too much capacity for array column while writing orc & some minor refactors to make code cleaner. Performance speeds up 15% for array column. [#67879](https://github.com/ClickHouse/ClickHouse/pull/67879) ([李扬](https://github.com/taiyang-li)).
+* Support OPTIMIZE query on Join table engine to reduce Join tables memory footprint. [#67883](https://github.com/ClickHouse/ClickHouse/pull/67883) ([Duc Canh Le](https://github.com/canhld94)).
+* Add replication lag and recovery time metrics. [#67913](https://github.com/ClickHouse/ClickHouse/pull/67913) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
+* Add '-no-pie' to aarch64 Linux builds to allow proper introspection and symbolizing of stacktraces after a ClickHouse restart. [#67916](https://github.com/ClickHouse/ClickHouse/pull/67916) ([filimonov](https://github.com/filimonov)).
+* Backported in [#68481](https://github.com/ClickHouse/ClickHouse/issues/68481): Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix settings/current_database in system.processes for async BACKUP/RESTORE. [#68163](https://github.com/ClickHouse/ClickHouse/pull/68163) ([Azat Khuzhin](https://github.com/azat)).
+* Remove unnecessary logs for MergeTree that doesn't support replication. [#68238](https://github.com/ClickHouse/ClickHouse/pull/68238) ([Daniil Ivanik](https://github.com/divanik)).
+* Backported in [#68430](https://github.com/ClickHouse/ClickHouse/issues/68430): Improve schema inference of date times. Now DateTime64 used only when date time has fractional part, otherwise regular DateTime is used. Inference of Date/DateTime is more strict now, especially when `date_time_input_format='best_effort'` to avoid inferring date times from strings in corner cases. [#68382](https://github.com/ClickHouse/ClickHouse/pull/68382) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Fix formatting of query with aliased JOIN ON expression, e.g. `... JOIN t2 ON (x = y) AS e ORDER BY x` should be formatted as `... JOIN t2 ON ((x = y) AS e) ORDER BY x`. [#66312](https://github.com/ClickHouse/ClickHouse/pull/66312) ([vdimir](https://github.com/vdimir)).
+* Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Fix possible runtime error while converting Array field with nulls to Array(Variant). [#66727](https://github.com/ClickHouse/ClickHouse/pull/66727) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Fix creating KeeperMap table after an incomplete drop. [#66865](https://github.com/ClickHouse/ClickHouse/pull/66865) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix broken part error while restoring to a `s3_plain_rewritable` disk. [#66881](https://github.com/ClickHouse/ClickHouse/pull/66881) ([Vitaly Baranov](https://github.com/vitlibar)).
+* In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
+* Fix invalid format detection in schema inference that could lead to logical error Format {} doesn't support schema inference. [#66899](https://github.com/ClickHouse/ClickHouse/pull/66899) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)).
+* Forbid create as select even when database_replicated_allow_heavy_create is set. It was unconditionally forbidden in 23.12 and accidentally allowed under the setting in unreleased 24.7. [#66980](https://github.com/ClickHouse/ClickHouse/pull/66980) ([vdimir](https://github.com/vdimir)).
+* Reading from the `numbers` could wrongly throw an exception when the `max_rows_to_read` limit was set. This closes [#66992](https://github.com/ClickHouse/ClickHouse/issues/66992). [#66996](https://github.com/ClickHouse/ClickHouse/pull/66996) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add proper type conversion to lagInFrame and leadInFrame window functions - fixes msan test. [#67091](https://github.com/ClickHouse/ClickHouse/pull/67091) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Use a separate client context in `clickhouse-local`. [#67133](https://github.com/ClickHouse/ClickHouse/pull/67133) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Correct behavior of `ORDER BY all` with disabled `enable_order_by_all` and parallel replicas (distributed queries as well). [#67153](https://github.com/ClickHouse/ClickHouse/pull/67153) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix wrong usage of input_format_max_bytes_to_read_for_schema_inference in schema cache. [#67157](https://github.com/ClickHouse/ClickHouse/pull/67157) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix the memory leak for count distinct, when exception issued during group by single nullable key. [#67171](https://github.com/ClickHouse/ClickHouse/pull/67171) ([Jet He](https://github.com/compasses)).
+* This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
+* Fix error `Conversion from AggregateFunction(name, Type) to AggregateFunction(name, Nullable(Type)) is not supported`. The bug was caused by the `optimize_rewrite_aggregate_function_with_if` optimization. Fixes [#67112](https://github.com/ClickHouse/ClickHouse/issues/67112). [#67229](https://github.com/ClickHouse/ClickHouse/pull/67229) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix hung query when using empty tuple as lhs of function IN. [#67295](https://github.com/ClickHouse/ClickHouse/pull/67295) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)).
+* It was possible to create a very deep nested JSON data that triggered stack overflow while skipping unknown fields. This closes [#67292](https://github.com/ClickHouse/ClickHouse/issues/67292). [#67324](https://github.com/ClickHouse/ClickHouse/pull/67324) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix attaching ReplicatedMergeTree table after exception during startup. [#67360](https://github.com/ClickHouse/ClickHouse/pull/67360) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix segfault caused by incorrectly detaching from thread group in `Aggregator`. [#67385](https://github.com/ClickHouse/ClickHouse/pull/67385) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix one more case when a non-deterministic function is specified in PK. [#67395](https://github.com/ClickHouse/ClickHouse/pull/67395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fixed `bloom_filter` index breaking queries with mildly weird conditions like `(k=2)=(k=2)` or `has([1,2,3], k)`. [#67423](https://github.com/ClickHouse/ClickHouse/pull/67423) ([Michael Kolupaev](https://github.com/al13n321)).
+* Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix wait for tasks in ~WriteBufferFromS3 in case WriteBuffer was cancelled. [#67459](https://github.com/ClickHouse/ClickHouse/pull/67459) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Protect temporary part directories from removing during RESTORE. [#67491](https://github.com/ClickHouse/ClickHouse/pull/67491) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Fix `Logical error: Expected the argument №N of type T to have X rows, but it has 0`. The error could happen in a remote query with constant expression in `GROUP BY` (with a new analyzer). [#67536](https://github.com/ClickHouse/ClickHouse/pull/67536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix join on tuple with NULLs: Some queries with the new analyzer and `NULL` inside the tuple in the `JOIN ON` section returned incorrect results. [#67538](https://github.com/ClickHouse/ClickHouse/pull/67538) ([vdimir](https://github.com/vdimir)).
+* Fix redundant reschedule of FileCache::freeSpaceRatioKeepingThreadFunc() in case of full non-evictable cache. [#67540](https://github.com/ClickHouse/ClickHouse/pull/67540) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fix for function `toStartOfWeek` which returned the wrong result with a small `DateTime64` value. [#67558](https://github.com/ClickHouse/ClickHouse/pull/67558) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix `Logical error: 'file_offset_of_buffer_end <= read_until_position'` in filesystem cache. Closes [#57508](https://github.com/ClickHouse/ClickHouse/issues/57508). [#67623](https://github.com/ClickHouse/ClickHouse/pull/67623) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixes [#62282](https://github.com/ClickHouse/ClickHouse/issues/62282). Removed the call to `convertFieldToString()` and added datatype specific serialization code. Parameterized view substitution was broken for multiple datatypes when parameter value was a function or expression returning datatype instance. [#67654](https://github.com/ClickHouse/ClickHouse/pull/67654) ([Shankar](https://github.com/shiyer7474)).
+* Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)).
+* Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix possible logical error "Unexpected return type from if" with experimental Variant type and enabled setting `use_variant_as_common_type ` in function if with Tuples and Maps. [#67687](https://github.com/ClickHouse/ClickHouse/pull/67687) ([Kruglov Pavel](https://github.com/Avogar)).
+* Due to a bug in Linux Kernel, a query can hung in `TimerDescriptor::drain`. This closes [#37686](https://github.com/ClickHouse/ClickHouse/issues/37686). [#67702](https://github.com/ClickHouse/ClickHouse/pull/67702) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix completion of `RESTORE ON CLUSTER` command. [#67720](https://github.com/ClickHouse/ClickHouse/pull/67720) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix dictionary hang in case of CANNOT_SCHEDULE_TASK while loading. [#67751](https://github.com/ClickHouse/ClickHouse/pull/67751) ([Azat Khuzhin](https://github.com/azat)).
+* Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Queries like `SELECT count() FROM t WHERE cast(c = 1 or c = 9999 AS Bool) SETTINGS use_skip_indexes=1` with bloom filter indexes on `c` now work correctly. [#67781](https://github.com/ClickHouse/ClickHouse/pull/67781) ([jsc0218](https://github.com/jsc0218)).
+* Fix wrong aggregation result in some queries with aggregation without keys and filter, close [#67419](https://github.com/ClickHouse/ClickHouse/issues/67419). [#67804](https://github.com/ClickHouse/ClickHouse/pull/67804) ([vdimir](https://github.com/vdimir)).
+* Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix DateTime64 parsing after constant folding in distributed queries, close [#66773](https://github.com/ClickHouse/ClickHouse/issues/66773). [#67920](https://github.com/ClickHouse/ClickHouse/pull/67920) ([vdimir](https://github.com/vdimir)).
+* Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Now ClickHouse doesn't consider part as broken if projection doesn't exist on disk but exists in `checksums.txt`. [#68003](https://github.com/ClickHouse/ClickHouse/pull/68003) ([alesapin](https://github.com/alesapin)).
+* Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Attempt to fix `Block structure mismatch in AggregatingStep stream: different types` for aggregate projection optimization. [#68107](https://github.com/ClickHouse/ClickHouse/pull/68107) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68343](https://github.com/ClickHouse/ClickHouse/issues/68343): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68400](https://github.com/ClickHouse/ClickHouse/issues/68400): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+
+#### Build/Testing/Packaging Improvement
+* Improved `test_storage_s3` tests: increased `s3_max_single_read_retries` for read from "unstable" s3 source and allowed all tests to run multiple times in a row. [#66896](https://github.com/ClickHouse/ClickHouse/pull/66896) ([Ilya Yatsishin](https://github.com/qoega)).
+* Integration tests flaky check will not run each test case multiple times to find more issues in tests and make them more reliable. It is using `pytest-repeat` library to run test case multiple times for the same environment. It is important to cleanup tables and other entities in the end of a test case to pass. Repeat works much faster than several pytest runs as it starts necessary containers only once. [#66986](https://github.com/ClickHouse/ClickHouse/pull/66986) ([Ilya Yatsishin](https://github.com/qoega)).
+* Allow to use CLion with ClickHouse. In previous versions, CLion freezed for a minute on every keypress. This closes [#66994](https://github.com/ClickHouse/ClickHouse/issues/66994). [#66995](https://github.com/ClickHouse/ClickHouse/pull/66995) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Getauxval: avoid crash under sanitizer re-exec due to high aslr entropy. [#67081](https://github.com/ClickHouse/ClickHouse/pull/67081) ([Raúl Marín](https://github.com/Algunenano)).
+* Some parts of client code are extracted to a single file and highest possible level optimization is applied to them even for debug builds. This closes: [#65745](https://github.com/ClickHouse/ClickHouse/issues/65745). [#67215](https://github.com/ClickHouse/ClickHouse/pull/67215) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### NO CL CATEGORY
+
+* Backported in [#68416](https://github.com/ClickHouse/ClickHouse/issues/68416):. [#68386](https://github.com/ClickHouse/ClickHouse/pull/68386) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Fix for 992 and friends"'. [#66993](https://github.com/ClickHouse/ClickHouse/pull/66993) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY:  'Revert "Revert "Fix for 992 and friends""'. [#67029](https://github.com/ClickHouse/ClickHouse/pull/67029) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* NO CL ENTRY:  'Revert "FuzzQuery table function"'. [#67040](https://github.com/ClickHouse/ClickHouse/pull/67040) ([Raúl Marín](https://github.com/Algunenano)).
+* NO CL ENTRY:  'Revert "Enable `compile_expressions` by default."'. [#67299](https://github.com/ClickHouse/ClickHouse/pull/67299) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY:  'Revert "Slightly better calculation of primary index"'. [#67392](https://github.com/ClickHouse/ClickHouse/pull/67392) ([alesapin](https://github.com/alesapin)).
+* NO CL ENTRY:  'Revert "Add settings to replace external engines to Null during create"'. [#67507](https://github.com/ClickHouse/ClickHouse/pull/67507) ([Raúl Marín](https://github.com/Algunenano)).
+* NO CL ENTRY:  'Revert "Revert "Add settings to replace external engines to Null during create""'. [#67511](https://github.com/ClickHouse/ClickHouse/pull/67511) ([Ilya Yatsishin](https://github.com/qoega)).
+* NO CL ENTRY:  'Revert "Add replication lag and recovery time metrics"'. [#67731](https://github.com/ClickHouse/ClickHouse/pull/67731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY:  'Revert "Revert "Slightly better calculation of primary index""'. [#67846](https://github.com/ClickHouse/ClickHouse/pull/67846) ([Anton Popov](https://github.com/CurtizJ)).
+* NO CL ENTRY:  'Revert "CI: Strict job timeout 1.5h for tests, 2h for builds"'. [#67986](https://github.com/ClickHouse/ClickHouse/pull/67986) ([Max K.](https://github.com/maxknv)).
+* NO CL ENTRY:  'Revert "Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing"'. [#68014](https://github.com/ClickHouse/ClickHouse/pull/68014) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* NO CL ENTRY:  'Revert "Use `Atomic` database by default in `clickhouse-local`"'. [#68023](https://github.com/ClickHouse/ClickHouse/pull/68023) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY:  'Revert "Refactor tests for (experimental) statistics"'. [#68156](https://github.com/ClickHouse/ClickHouse/pull/68156) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* CI: enable libfuzzer (fixing build and docker). [#61908](https://github.com/ClickHouse/ClickHouse/pull/61908) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Initial implementation of vector similarity index. [#63675](https://github.com/ClickHouse/ClickHouse/pull/63675) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update zlib-ng from 2.0.2 to 2.1.7. [#64489](https://github.com/ClickHouse/ClickHouse/pull/64489) ([Michael Kolupaev](https://github.com/al13n321)).
+* Fix 02444_async_broken_outdated_part_loading flakiness. [#64956](https://github.com/ClickHouse/ClickHouse/pull/64956) ([Azat Khuzhin](https://github.com/azat)).
+* attach_gdb.lib: print more information before all stacks. [#65253](https://github.com/ClickHouse/ClickHouse/pull/65253) ([Michael Kolupaev](https://github.com/al13n321)).
+* Fix some perf tests. [#65320](https://github.com/ClickHouse/ClickHouse/pull/65320) ([Nikita Taranov](https://github.com/nickitat)).
+* Remove ActionsDAGPtr whenever it is possible. [#65414](https://github.com/ClickHouse/ClickHouse/pull/65414) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Scheduler queue throughput introspection, Fix CPU indication in client. [#65654](https://github.com/ClickHouse/ClickHouse/pull/65654) ([Sergei Trifonov](https://github.com/serxa)).
+* Increase timeout in 02122_join_group_by_timeout for tsan build. [#65976](https://github.com/ClickHouse/ClickHouse/pull/65976) ([vdimir](https://github.com/vdimir)).
+* Remove default values for certificateFile/privateKeyFile/dhParamsFile in keeper config (to avoid annoying errors in logs). [#65978](https://github.com/ClickHouse/ClickHouse/pull/65978) ([Azat Khuzhin](https://github.com/azat)).
+* Update version_date.tsv and changelogs after v24.3.5.46-lts. [#66054](https://github.com/ClickHouse/ClickHouse/pull/66054) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix flaky `02814_currentDatabase_for_table_functions`. [#66111](https://github.com/ClickHouse/ClickHouse/pull/66111) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix possible data-race StorageKafka with statistics_interval_ms>0. [#66311](https://github.com/ClickHouse/ClickHouse/pull/66311) ([Azat Khuzhin](https://github.com/azat)).
+* Avoid unneeded calculation in SeriesPeriodDetect. [#66320](https://github.com/ClickHouse/ClickHouse/pull/66320) ([Ruihang Xia](https://github.com/waynexia)).
+* It aims to complete [#58630](https://github.com/ClickHouse/ClickHouse/issues/58630). This is made possible by [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463), [#61459](https://github.com/ClickHouse/ClickHouse/issues/61459) and [#60082](https://github.com/ClickHouse/ClickHouse/issues/60082). [#66443](https://github.com/ClickHouse/ClickHouse/pull/66443) ([Amos Bird](https://github.com/amosbird)).
+* Allow run query instantly in play. [#66457](https://github.com/ClickHouse/ClickHouse/pull/66457) ([Aleksandr Musorin](https://github.com/AVMusorin)).
+* Bump ICU from v70 to v75. [#66474](https://github.com/ClickHouse/ClickHouse/pull/66474) ([Robert Schulze](https://github.com/rschu1ze)).
+* Bump RocksDB from v6.29.5 to v7.10.2. [#66475](https://github.com/ClickHouse/ClickHouse/pull/66475) ([Robert Schulze](https://github.com/rschu1ze)).
+* Bump RocksDB from v7.10.2 to v8.9.1. [#66479](https://github.com/ClickHouse/ClickHouse/pull/66479) ([Robert Schulze](https://github.com/rschu1ze)).
+* I believe the error code for this function should not be "NOT_ALLOWED" since it simply is an invalid query and "BAD_QUERY_PARAMETER" is a more reasonable error code for this. [#66491](https://github.com/ClickHouse/ClickHouse/pull/66491) ([Ali](https://github.com/xogoodnow)).
+* Update gdb to 15.1 (by compiling from sources). [#66494](https://github.com/ClickHouse/ClickHouse/pull/66494) ([Azat Khuzhin](https://github.com/azat)).
+* Ensure that llvm-symbolizer is used for symbolizing sanitizer reports. [#66495](https://github.com/ClickHouse/ClickHouse/pull/66495) ([Azat Khuzhin](https://github.com/azat)).
+* Remove unused local variables. [#66503](https://github.com/ClickHouse/ClickHouse/pull/66503) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* This will solve a lot of problems with inconsistent formatting. And it opens the path for [#65753](https://github.com/ClickHouse/ClickHouse/issues/65753). This closes [#66807](https://github.com/ClickHouse/ClickHouse/issues/66807). This closes [#61611](https://github.com/ClickHouse/ClickHouse/issues/61611). This closes [#61711](https://github.com/ClickHouse/ClickHouse/issues/61711). This closes [#67445](https://github.com/ClickHouse/ClickHouse/issues/67445). [#66506](https://github.com/ClickHouse/ClickHouse/pull/66506) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Rename Context::getSettings() to Context::getSettingsCopy(). [#66528](https://github.com/ClickHouse/ClickHouse/pull/66528) ([Raúl Marín](https://github.com/Algunenano)).
+* Uninteresting change: introducing `ClientApplicationBase`. [#66549](https://github.com/ClickHouse/ClickHouse/pull/66549) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Respond to parallel replicas protocol requests with priority on initiator. [#66618](https://github.com/ClickHouse/ClickHouse/pull/66618) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix bad code: it was catching exceptions. [#66628](https://github.com/ClickHouse/ClickHouse/pull/66628) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Dump all memory stats in CgroupsMemoryUsageObserver on hitting the limit. [#66732](https://github.com/ClickHouse/ClickHouse/pull/66732) ([Nikita Taranov](https://github.com/nickitat)).
+* Save writer thread id in shared mutex for debugging. [#66745](https://github.com/ClickHouse/ClickHouse/pull/66745) ([Alexander Gololobov](https://github.com/davenger)).
+* Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
+* Looks like it runs too many mutations sometimes and fails to process them within the timeout. So if a query waits for mutations - the test fails. [#66785](https://github.com/ClickHouse/ClickHouse/pull/66785) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Better diagnostics in functional tests. [#66790](https://github.com/ClickHouse/ClickHouse/pull/66790) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix bad test `01042_system_reload_dictionary_reloads_completely`. [#66811](https://github.com/ClickHouse/ClickHouse/pull/66811) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Something is strange with the test about refreshable materialized views. [#66816](https://github.com/ClickHouse/ClickHouse/pull/66816) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Randomize `trace_profile_events`. [#66821](https://github.com/ClickHouse/ClickHouse/pull/66821) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Stop ignoring SIGSEGV in GDB. [#66847](https://github.com/ClickHouse/ClickHouse/pull/66847) ([Antonio Andelic](https://github.com/antonio2368)).
+* Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
+* When executing queries with parallel replicas that involve only a subset of nodes within a shard, the current behavior is that if all participating replicas are unavailable, the query completes without any errors but returns no results. Referencing issue [#65467](https://github.com/ClickHouse/ClickHouse/issues/65467), this pull request addresses the issue where only a portion of the nodes in a shard are participating in the execution. [#66880](https://github.com/ClickHouse/ClickHouse/pull/66880) ([zoomxi](https://github.com/zoomxi)).
+* Speed up stateful tests setup. [#66886](https://github.com/ClickHouse/ClickHouse/pull/66886) ([Raúl Marín](https://github.com/Algunenano)).
+* Functions [h-r]*: Iterate over input_rows_count where appropriate. [#66897](https://github.com/ClickHouse/ClickHouse/pull/66897) ([Robert Schulze](https://github.com/rschu1ze)).
+* Stateless tests: Change status of failed tests in case of server crash and add no-parallel to high-load tests. [#66901](https://github.com/ClickHouse/ClickHouse/pull/66901) ([Nikita Fomichev](https://github.com/fm4v)).
+* Fix performance test about the generateRandom table function, supposedly. [#66906](https://github.com/ClickHouse/ClickHouse/pull/66906) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix bad tests `share_big_sets`, CC @davenger. [#66908](https://github.com/ClickHouse/ClickHouse/pull/66908) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Speed up mutations for non-replicated MergeTree a bit. [#66909](https://github.com/ClickHouse/ClickHouse/pull/66909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Speed up mutations for non-replicated MergeTree significantly. [#66911](https://github.com/ClickHouse/ClickHouse/pull/66911) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix views over distributed tables with Analyzer. [#66912](https://github.com/ClickHouse/ClickHouse/pull/66912) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* [CI fest] Try to fix `test_broken_projections/test.py::test_broken_ignored_replicated`. [#66915](https://github.com/ClickHouse/ClickHouse/pull/66915) ([Andrey Zvonov](https://github.com/zvonand)).
+* Decrease rate limit in `01923_network_receive_time_metric_insert`. [#66924](https://github.com/ClickHouse/ClickHouse/pull/66924) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Grouparrayintersect: fix serialization bug. [#66928](https://github.com/ClickHouse/ClickHouse/pull/66928) ([Raúl Marín](https://github.com/Algunenano)).
+* Update version after release branch. [#66929](https://github.com/ClickHouse/ClickHouse/pull/66929) ([Raúl Marín](https://github.com/Algunenano)).
+* Un-flake test_runtime_configurable_cache_size. [#66934](https://github.com/ClickHouse/ClickHouse/pull/66934) ([Robert Schulze](https://github.com/rschu1ze)).
+* fix unit tests ResolvePoolTest with timeouts. [#66953](https://github.com/ClickHouse/ClickHouse/pull/66953) ([Sema Checherinda](https://github.com/CheSema)).
+* Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)).
+* CI: Fixes docker server build for release branches. [#66955](https://github.com/ClickHouse/ClickHouse/pull/66955) ([Max K.](https://github.com/maxknv)).
+* Addressing issue [#64936](https://github.com/ClickHouse/ClickHouse/issues/64936). [#66973](https://github.com/ClickHouse/ClickHouse/pull/66973) ([alesapin](https://github.com/alesapin)).
+* Add initial 24.7 changelog. [#66976](https://github.com/ClickHouse/ClickHouse/pull/66976) ([Raúl Marín](https://github.com/Algunenano)).
+* Apply libunwind fix. [#66977](https://github.com/ClickHouse/ClickHouse/pull/66977) ([Michael Kolupaev](https://github.com/al13n321)).
+* CI: Add logs for debugging. [#66979](https://github.com/ClickHouse/ClickHouse/pull/66979) ([Max K.](https://github.com/maxknv)).
+* [CI Fest] Split dynamic tests and rewrite them from sh to sql to avoid timeouts. [#66981](https://github.com/ClickHouse/ClickHouse/pull/66981) ([Kruglov Pavel](https://github.com/Avogar)).
+* Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)).
+* [CI Fest] Fix use-of-uninitialized-value in JSONExtract* numeric functions. [#66984](https://github.com/ClickHouse/ClickHouse/pull/66984) ([Kruglov Pavel](https://github.com/Avogar)).
+* It should fix SQLancer checks, but for some reason we stopped invalidating cache for docker builds and fix was not published to our CI for a while. [#66987](https://github.com/ClickHouse/ClickHouse/pull/66987) ([Ilya Yatsishin](https://github.com/qoega)).
+* Fixes [#66941](https://github.com/ClickHouse/ClickHouse/issues/66941). [#66991](https://github.com/ClickHouse/ClickHouse/pull/66991) ([Konstantin Bogdanov](https://github.com/thevar1able)).
+* Remove the support for Kerberized HDFS. [#66998](https://github.com/ClickHouse/ClickHouse/pull/66998) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* CI: Fix for workflow results parsing. [#67000](https://github.com/ClickHouse/ClickHouse/pull/67000) ([Max K.](https://github.com/maxknv)).
+* Fix flaky `01454_storagememory_data_race_challenge`. [#67003](https://github.com/ClickHouse/ClickHouse/pull/67003) ([Antonio Andelic](https://github.com/antonio2368)).
+* CI: Jepsen Workflow fix for skipped builds and observability. [#67004](https://github.com/ClickHouse/ClickHouse/pull/67004) ([Max K.](https://github.com/maxknv)).
+* bugfix AttachedTable counting not symmetry, and adding some test logs…. [#67007](https://github.com/ClickHouse/ClickHouse/pull/67007) ([Xu Jia](https://github.com/XuJia0210)).
+* CI: Automerge when required and non-required checks completed. [#67008](https://github.com/ClickHouse/ClickHouse/pull/67008) ([Max K.](https://github.com/maxknv)).
+* Fix test `very_long_arrays`. [#67009](https://github.com/ClickHouse/ClickHouse/pull/67009) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Try to fix exception logging in destructors of static objects. [#67016](https://github.com/ClickHouse/ClickHouse/pull/67016) ([Antonio Andelic](https://github.com/antonio2368)).
+* [Green CI] Fix test test_storage_azure_blob_storage. [#67019](https://github.com/ClickHouse/ClickHouse/pull/67019) ([Daniil Ivanik](https://github.com/divanik)).
+* Integration tests: fix flaky tests `test_backup_restore_on_cluster/test_concurrency.py` & `test_manipulate_statistics/test.py`. [#67027](https://github.com/ClickHouse/ClickHouse/pull/67027) ([Nikita Fomichev](https://github.com/fm4v)).
+* [Green CI] Fix test test_storage_s3_queue/test.py::test_max_set_age. [#67035](https://github.com/ClickHouse/ClickHouse/pull/67035) ([Pablo Marcos](https://github.com/pamarcos)).
+* Test for alter select with parallel replicas. [#67041](https://github.com/ClickHouse/ClickHouse/pull/67041) ([Igor Nikonov](https://github.com/devcrafter)).
+* Split query into multiple queries to consume less memory at once + use less data. Fixes [#67034](https://github.com/ClickHouse/ClickHouse/issues/67034). [#67044](https://github.com/ClickHouse/ClickHouse/pull/67044) ([alesapin](https://github.com/alesapin)).
+* Disable setting `optimize_functions_to_subcolumns`. [#67046](https://github.com/ClickHouse/ClickHouse/pull/67046) ([Anton Popov](https://github.com/CurtizJ)).
+* Increase max allocation size for sanitizers. [#67049](https://github.com/ClickHouse/ClickHouse/pull/67049) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* rewrite 01171 test. [#67054](https://github.com/ClickHouse/ClickHouse/pull/67054) ([Sema Checherinda](https://github.com/CheSema)).
+* Add `**` to `hdfs` docs, add test for `**` in `hdfs`. [#67064](https://github.com/ClickHouse/ClickHouse/pull/67064) ([Andrey Zvonov](https://github.com/zvonand)).
+* Very sad failure: ``` 2024.07.24 13:28:45.517777 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Debug> executeQuery: (from 172.16.11.1:55890) OPTIMIZE TABLE replicated_mt FINAL (stage: Complete) 2024.07.24 13:28:45.525945 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Trace> default.replicated_mt (ReplicatedMergeTreeQueue): Waiting for 4 entries to be processed: queue-0000000004, queue-0000000002, queue-0000000001, queue-0000000000 2024.07.24 13:29:15.528024 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Debug> default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e) (MergerMutator): Selected 3 parts from all_0_0_0 to all_2_2_0 2024.07.24 13:29:15.530736 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Trace> default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Created log entry /clickhouse/tables/replicated_mt/log/log-0000000004 for merge all_0_2_1 2024.07.24 13:29:15.530873 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Debug> default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for node1 to process log entry 2024.07.24 13:29:15.530919 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Debug> default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for node1 to pull log-0000000004 to queue 2024.07.24 13:29:15.534286 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Debug> default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Looking for node corresponding to log-0000000004 in node1 queue 2024.07.24 13:29:15.534793 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Debug> default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for queue-0000000005 to disappear from node1 queue 2024.07.24 13:29:15.585533 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} <Debug> TCPHandler: Processed in 30.067804125 sec. ```. [#67067](https://github.com/ClickHouse/ClickHouse/pull/67067) ([alesapin](https://github.com/alesapin)).
+* Fix flaky `test_seekable_formats_url` and `test_seekable_formats` S3 storage tests. [#67070](https://github.com/ClickHouse/ClickHouse/pull/67070) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* CI: Docker server build fix for new release workflow. [#67075](https://github.com/ClickHouse/ClickHouse/pull/67075) ([Max K.](https://github.com/maxknv)).
+* Fix 2680 flasky. [#67078](https://github.com/ClickHouse/ClickHouse/pull/67078) ([jsc0218](https://github.com/jsc0218)).
+* [CI Fest] Fix flaky 02447_drop_replica test. [#67085](https://github.com/ClickHouse/ClickHouse/pull/67085) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fixes [#67030](https://github.com/ClickHouse/ClickHouse/issues/67030). [#67086](https://github.com/ClickHouse/ClickHouse/pull/67086) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Increase timeout for curl in tests. [#67089](https://github.com/ClickHouse/ClickHouse/pull/67089) ([Anton Popov](https://github.com/CurtizJ)).
+* Try calculating available memory if ClickHouse is bound to subset of NUMA nodes. [#67098](https://github.com/ClickHouse/ClickHouse/pull/67098) ([Antonio Andelic](https://github.com/antonio2368)).
+* A more precise way of tracking flushing time in 01246_buffer_flush. [#67099](https://github.com/ClickHouse/ClickHouse/pull/67099) ([Azat Khuzhin](https://github.com/azat)).
+* Do not fail CheckReadyForMerge on failed Tests_2 (non-required jobs) Do not skip CiBuddy report step on failures. [#67101](https://github.com/ClickHouse/ClickHouse/pull/67101) ([Max K.](https://github.com/maxknv)).
+* Tststs_1 - for all required checks Tststs_2 - for all non-required checks (normal mode) Tststs_2_ww - for all non-required checks (woolenwolfdog mode). [#67104](https://github.com/ClickHouse/ClickHouse/pull/67104) ([Max K.](https://github.com/maxknv)).
+* Functions [s-t]*: Iterate over input_rows_count where appropriate. [#67105](https://github.com/ClickHouse/ClickHouse/pull/67105) ([Robert Schulze](https://github.com/rschu1ze)).
+* Reintroduce 02805_distributed_queries_timeouts. [#67106](https://github.com/ClickHouse/ClickHouse/pull/67106) ([Azat Khuzhin](https://github.com/azat)).
+* Added some tests in relation with [#54881](https://github.com/ClickHouse/ClickHouse/issues/54881). [#67110](https://github.com/ClickHouse/ClickHouse/pull/67110) ([max-vostrikov](https://github.com/max-vostrikov)).
+* Reintroduce 03002_part_log_rmt_fetch_*_error tests without flakiness and less time. [#67113](https://github.com/ClickHouse/ClickHouse/pull/67113) ([Azat Khuzhin](https://github.com/azat)).
+* Improve tag matching in backport scripts. [#67118](https://github.com/ClickHouse/ClickHouse/pull/67118) ([Raúl Marín](https://github.com/Algunenano)).
+* Fixes [#67111](https://github.com/ClickHouse/ClickHouse/issues/67111). [#67121](https://github.com/ClickHouse/ClickHouse/pull/67121) ([Konstantin Bogdanov](https://github.com/thevar1able)).
+* Increase lock_acquire_timeout_for_background_operations setting in dynamic merges tests. [#67126](https://github.com/ClickHouse/ClickHouse/pull/67126) ([Kruglov Pavel](https://github.com/Avogar)).
+* Attempt to fix flakiness of some window view tests. [#67130](https://github.com/ClickHouse/ClickHouse/pull/67130) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update assert in cache. [#67138](https://github.com/ClickHouse/ClickHouse/pull/67138) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix test `00673_subquery_prepared_set_performance`. [#67141](https://github.com/ClickHouse/ClickHouse/pull/67141) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fixes [#67047](https://github.com/ClickHouse/ClickHouse/issues/67047). [#67142](https://github.com/ClickHouse/ClickHouse/pull/67142) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Lower max allocation size in query fuzzer. [#67145](https://github.com/ClickHouse/ClickHouse/pull/67145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fixes [#66966](https://github.com/ClickHouse/ClickHouse/issues/66966). [#67147](https://github.com/ClickHouse/ClickHouse/pull/67147) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Try fix `02481_async_insert_race_long` flakiness. [#67148](https://github.com/ClickHouse/ClickHouse/pull/67148) ([Julia Kartseva](https://github.com/jkartseva)).
+* Rename (unreleased) bad setting. [#67149](https://github.com/ClickHouse/ClickHouse/pull/67149) ([Raúl Marín](https://github.com/Algunenano)).
+* Uncomment accidentally commented out code in QueryProfiler. [#67152](https://github.com/ClickHouse/ClickHouse/pull/67152) ([Michael Kolupaev](https://github.com/al13n321)).
+* Try to fix 2572. [#67158](https://github.com/ClickHouse/ClickHouse/pull/67158) ([jsc0218](https://github.com/jsc0218)).
+* Fix benign data race in ZooKeeper. [#67164](https://github.com/ClickHouse/ClickHouse/pull/67164) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove too long unit test. [#67168](https://github.com/ClickHouse/ClickHouse/pull/67168) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix `00705_drop_create_merge_tree`. [#67170](https://github.com/ClickHouse/ClickHouse/pull/67170) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix stacktrace cache. [#67173](https://github.com/ClickHouse/ClickHouse/pull/67173) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fixes [#67151](https://github.com/ClickHouse/ClickHouse/issues/67151). [#67174](https://github.com/ClickHouse/ClickHouse/pull/67174) ([Konstantin Bogdanov](https://github.com/thevar1able)).
+* Make 02908_many_requests_to_system_replicas less stressful. [#67176](https://github.com/ClickHouse/ClickHouse/pull/67176) ([Alexander Gololobov](https://github.com/davenger)).
+* Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)).
+* Do not count AttachedTable for tables in information schema databases. [#67187](https://github.com/ClickHouse/ClickHouse/pull/67187) ([Sergei Trifonov](https://github.com/serxa)).
+* Verbose output for 03203_client_benchmark_options. [#67188](https://github.com/ClickHouse/ClickHouse/pull/67188) ([vdimir](https://github.com/vdimir)).
+* Split test 02967_parallel_replicas_join_algo_and_analyzer. [#67211](https://github.com/ClickHouse/ClickHouse/pull/67211) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix flaky `test_pkill_query_log` (tsan). [#67223](https://github.com/ClickHouse/ClickHouse/pull/67223) ([Sergei Trifonov](https://github.com/serxa)).
+* Remove integration test `test_broken_projections_in_backups_1`. [#67231](https://github.com/ClickHouse/ClickHouse/pull/67231) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Debug logging for [#67002](https://github.com/ClickHouse/ClickHouse/issues/67002). [#67233](https://github.com/ClickHouse/ClickHouse/pull/67233) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix oss-fuzz build. [#67235](https://github.com/ClickHouse/ClickHouse/pull/67235) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix flaky 00180_no_seek_avoiding_when_reading_from_cache. [#67236](https://github.com/ClickHouse/ClickHouse/pull/67236) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* English. [#67258](https://github.com/ClickHouse/ClickHouse/pull/67258) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove strange code. [#67260](https://github.com/ClickHouse/ClickHouse/pull/67260) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix MSan report in DatabaseReplicated. [#67262](https://github.com/ClickHouse/ClickHouse/pull/67262) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `02310_clickhouse_local_INSERT_progress_profile_events`. [#67264](https://github.com/ClickHouse/ClickHouse/pull/67264) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove test `02982_aggregation_states_destruction`. [#67266](https://github.com/ClickHouse/ClickHouse/pull/67266) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix OOM in test runs. [#67268](https://github.com/ClickHouse/ClickHouse/pull/67268) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove harmful stuff from tests. [#67275](https://github.com/ClickHouse/ClickHouse/pull/67275) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `03201_variant_null_map_subcolumn`. [#67276](https://github.com/ClickHouse/ClickHouse/pull/67276) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Split `01651_lc_insert_tiny_log`. [#67279](https://github.com/ClickHouse/ClickHouse/pull/67279) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Debug test `02490_benchmark_max_consecutive_errors`. [#67281](https://github.com/ClickHouse/ClickHouse/pull/67281) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix bad test `02833_concurrrent_sessions`. [#67282](https://github.com/ClickHouse/ClickHouse/pull/67282) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a separate test for exception handling. [#67283](https://github.com/ClickHouse/ClickHouse/pull/67283) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Debug test `01600_parts_states_metrics_long`. [#67284](https://github.com/ClickHouse/ClickHouse/pull/67284) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Faster test `02231_buffer_aggregate_states_leak`. [#67285](https://github.com/ClickHouse/ClickHouse/pull/67285) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix curiosities in `TimerDescriptor`. [#67287](https://github.com/ClickHouse/ClickHouse/pull/67287) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add retries to test `02911_backup_restore_keeper_map`. [#67290](https://github.com/ClickHouse/ClickHouse/pull/67290) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Functions: Iterate over input_rows_count where appropriate. [#67294](https://github.com/ClickHouse/ClickHouse/pull/67294) ([Robert Schulze](https://github.com/rschu1ze)).
+* Add documentation for `compile_expressions`. [#67300](https://github.com/ClickHouse/ClickHouse/pull/67300) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Wrap log lines in the CI report for functional tests. [#67301](https://github.com/ClickHouse/ClickHouse/pull/67301) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix bad test `02050_client_profile_events`. [#67309](https://github.com/ClickHouse/ClickHouse/pull/67309) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* I do not want to think about this code. [#67312](https://github.com/ClickHouse/ClickHouse/pull/67312) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `00940_max_parts_in_total`. [#67313](https://github.com/ClickHouse/ClickHouse/pull/67313) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Tests for Kafka cannot run in parallel. [#67315](https://github.com/ClickHouse/ClickHouse/pull/67315) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a test for [#66285](https://github.com/ClickHouse/ClickHouse/issues/66285). [#67325](https://github.com/ClickHouse/ClickHouse/pull/67325) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Follow-up for [#67301](https://github.com/ClickHouse/ClickHouse/issues/67301). [#67327](https://github.com/ClickHouse/ClickHouse/pull/67327) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a test for [#61659](https://github.com/ClickHouse/ClickHouse/issues/61659). [#67332](https://github.com/ClickHouse/ClickHouse/pull/67332) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix integration test `test_backup_restore_on_cluster/test_disallow_concurrency`. [#67336](https://github.com/ClickHouse/ClickHouse/pull/67336) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Faster and less flaky 01246_buffer_flush (by using HTTP over clickhouse-client). [#67340](https://github.com/ClickHouse/ClickHouse/pull/67340) ([Azat Khuzhin](https://github.com/azat)).
+* Fix: data race in TCPHandler on socket timeouts settings. [#67341](https://github.com/ClickHouse/ClickHouse/pull/67341) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* CI: Print stdout, stderr for docker pull command. [#67343](https://github.com/ClickHouse/ClickHouse/pull/67343) ([Max K.](https://github.com/maxknv)).
+* Followup [#67290](https://github.com/ClickHouse/ClickHouse/issues/67290). [#67348](https://github.com/ClickHouse/ClickHouse/pull/67348) ([vdimir](https://github.com/vdimir)).
+* Skip parallel for `test_storage_kerberized_kafka`. [#67349](https://github.com/ClickHouse/ClickHouse/pull/67349) ([Andrey Zvonov](https://github.com/zvonand)).
+* Don't use PeekableReadBuffer in JSONAsObject format. [#67354](https://github.com/ClickHouse/ClickHouse/pull/67354) ([Kruglov Pavel](https://github.com/Avogar)).
+* This closes: [#57316](https://github.com/ClickHouse/ClickHouse/issues/57316). [#67355](https://github.com/ClickHouse/ClickHouse/pull/67355) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Remove duplicated tests. [#67357](https://github.com/ClickHouse/ClickHouse/pull/67357) ([Kruglov Pavel](https://github.com/Avogar)).
+* Release branch was not detected properly and job which must run on release branch could be reused from feature branches. PR Fixes detection of release branches. [#67358](https://github.com/ClickHouse/ClickHouse/pull/67358) ([Max K.](https://github.com/maxknv)).
+* Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add no-distributed-cache tag in tests. [#67361](https://github.com/ClickHouse/ClickHouse/pull/67361) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Trying to fix test_cache_evicted_by_temporary_data and print debug info. [#67362](https://github.com/ClickHouse/ClickHouse/pull/67362) ([vdimir](https://github.com/vdimir)).
+* Try to fix: ALL_CONNECTION_TRIES_FAILED with parallel replicas. [#67389](https://github.com/ClickHouse/ClickHouse/pull/67389) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix bad test `01036_no_superfluous_dict_reload_on_create_database`. [#67390](https://github.com/ClickHouse/ClickHouse/pull/67390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Adding `SelectedPartsTotal` and `SelectedMarksTotal` as new ProfileEvents. [#67393](https://github.com/ClickHouse/ClickHouse/pull/67393) ([Jordi Villar](https://github.com/jrdi)).
+* Print debug info in `test_storage_s3_queue/test.py::test_shards_distributed`. [#67394](https://github.com/ClickHouse/ClickHouse/pull/67394) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Mute degraded perf test. [#67396](https://github.com/ClickHouse/ClickHouse/pull/67396) ([Nikita Taranov](https://github.com/nickitat)).
+* Debug TimerDescriptor. [#67397](https://github.com/ClickHouse/ClickHouse/pull/67397) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove has_single_port property from plan stream. [#67398](https://github.com/ClickHouse/ClickHouse/pull/67398) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix typo. [#67400](https://github.com/ClickHouse/ClickHouse/pull/67400) ([Halersson Paris](https://github.com/halersson)).
+* CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).
+* Disable 02932_refreshable_materialized_views. [#67404](https://github.com/ClickHouse/ClickHouse/pull/67404) ([Michael Kolupaev](https://github.com/al13n321)).
+* Follow-up to [#67294](https://github.com/ClickHouse/ClickHouse/issues/67294). [#67405](https://github.com/ClickHouse/ClickHouse/pull/67405) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix DWARF range list parsing in stack symbolizer. [#67417](https://github.com/ClickHouse/ClickHouse/pull/67417) ([Michael Kolupaev](https://github.com/al13n321)).
+* Make Dwarf::findAddress() fallback slow path less slow. [#67418](https://github.com/ClickHouse/ClickHouse/pull/67418) ([Michael Kolupaev](https://github.com/al13n321)).
+* Fix inconsistent formatting of CODEC and STATISTICS. [#67421](https://github.com/ClickHouse/ClickHouse/pull/67421) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Reduced complexity of the test 02832_alter_max_sessions_for_user. [#67425](https://github.com/ClickHouse/ClickHouse/pull/67425) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
+* Remove obsolete `--multiquery` parameter from tests. [#67435](https://github.com/ClickHouse/ClickHouse/pull/67435) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix public backports. [#67439](https://github.com/ClickHouse/ClickHouse/pull/67439) ([Raúl Marín](https://github.com/Algunenano)).
+* Bump Azure from v1.12 to v1.13. [#67446](https://github.com/ClickHouse/ClickHouse/pull/67446) ([Robert Schulze](https://github.com/rschu1ze)).
+* 24.7 add missing documentation and testing. [#67454](https://github.com/ClickHouse/ClickHouse/pull/67454) ([Nikita Fomichev](https://github.com/fm4v)).
+* Use correct order of fields in `StorageURLSource`. [#67455](https://github.com/ClickHouse/ClickHouse/pull/67455) ([Antonio Andelic](https://github.com/antonio2368)).
+* run 01171 test in parallel. [#67470](https://github.com/ClickHouse/ClickHouse/pull/67470) ([Sema Checherinda](https://github.com/CheSema)).
+* [Green CI] Fix WriteBuffer destructor when finalize has failed for MergeTreeDeduplicationLog::shutdown. [#67474](https://github.com/ClickHouse/ClickHouse/pull/67474) ([Alexey Katsman](https://github.com/alexkats)).
+* Reduce 02473_multistep_prewhere run time. [#67475](https://github.com/ClickHouse/ClickHouse/pull/67475) ([Alexander Gololobov](https://github.com/davenger)).
+* Update version_date.tsv and changelogs after v24.7.1.2915-stable. [#67483](https://github.com/ClickHouse/ClickHouse/pull/67483) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Revert [#61750](https://github.com/ClickHouse/ClickHouse/issues/61750) "Improve JSONEachRow reading by ignoring the keys case". [#67484](https://github.com/ClickHouse/ClickHouse/pull/67484) ([Michael Kolupaev](https://github.com/al13n321)).
+* Disable parallel run for `01923_network_receive_time_metric_insert.sh`. [#67492](https://github.com/ClickHouse/ClickHouse/pull/67492) ([Julia Kartseva](https://github.com/jkartseva)).
+* Fix test `test_backup_restore_on_cluster/test.py::test_mutation`. [#67494](https://github.com/ClickHouse/ClickHouse/pull/67494) ([Vitaly Baranov](https://github.com/vitlibar)).
+* [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)).
+* [Green CI] Test `test_storage_azure_blob_storage/test.py` is flaky. [#67512](https://github.com/ClickHouse/ClickHouse/pull/67512) ([Daniil Ivanik](https://github.com/divanik)).
+* Prepare Release workflow for production. [#67523](https://github.com/ClickHouse/ClickHouse/pull/67523) ([Max K.](https://github.com/maxknv)).
+* Fix upgrade check. [#67524](https://github.com/ClickHouse/ClickHouse/pull/67524) ([Raúl Marín](https://github.com/Algunenano)).
+* [Green CI] test 03164_s3_settings_for_queries_and_merges is flaky. [#67535](https://github.com/ClickHouse/ClickHouse/pull/67535) ([Daniil Ivanik](https://github.com/divanik)).
+* Log message and increased concurrency for table removal. [#67537](https://github.com/ClickHouse/ClickHouse/pull/67537) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix AsyncLoader destruction race. [#67553](https://github.com/ClickHouse/ClickHouse/pull/67553) ([Sergei Trifonov](https://github.com/serxa)).
+* Add an assert into TimerDescriptor. [#67555](https://github.com/ClickHouse/ClickHouse/pull/67555) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Re-enable ICU on s390/x. [#67557](https://github.com/ClickHouse/ClickHouse/pull/67557) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update version_date.tsv and changelogs after v24.4.4.107-stable. [#67559](https://github.com/ClickHouse/ClickHouse/pull/67559) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Sometimes we fail with timeout in stateless tests and the reason for that seems to be in `stop_logs_replication` step. Add a check for timeout here. [#67560](https://github.com/ClickHouse/ClickHouse/pull/67560) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Miscellaneous. [#67564](https://github.com/ClickHouse/ClickHouse/pull/67564) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* enable parallel_view_processing in perf tests. [#67565](https://github.com/ClickHouse/ClickHouse/pull/67565) ([Sema Checherinda](https://github.com/CheSema)).
+* Fix flaky `test_system_kafka_consumers_rebalance`. [#67566](https://github.com/ClickHouse/ClickHouse/pull/67566) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Update version_date.tsv and changelogs after v24.7.2.13-stable. [#67586](https://github.com/ClickHouse/ClickHouse/pull/67586) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix 01811_storage_buffer_flush_parameters flakiness. [#67589](https://github.com/ClickHouse/ClickHouse/pull/67589) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)).
+* Update minio in integration tests. [#67595](https://github.com/ClickHouse/ClickHouse/pull/67595) ([Antonio Andelic](https://github.com/antonio2368)).
+* added tests for page index in parquet files. [#67596](https://github.com/ClickHouse/ClickHouse/pull/67596) ([max-vostrikov](https://github.com/max-vostrikov)).
+* Update check_rabbitmq_is_available. [#67597](https://github.com/ClickHouse/ClickHouse/pull/67597) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)).
+* Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)).
+* Update CHANGELOG.md. [#67607](https://github.com/ClickHouse/ClickHouse/pull/67607) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove some `no-parallel` tags from tests. [#67610](https://github.com/ClickHouse/ClickHouse/pull/67610) ([Raúl Marín](https://github.com/Algunenano)).
+* Update README.md. [#67613](https://github.com/ClickHouse/ClickHouse/pull/67613) ([Tyler Hannan](https://github.com/tylerhannan)).
+* Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)).
+* Don't run ASAN unit tests under gdb. [#67622](https://github.com/ClickHouse/ClickHouse/pull/67622) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash in KeyCondition::cloneASTWithInversionPushDown() caused by type change. [#67641](https://github.com/ClickHouse/ClickHouse/pull/67641) ([Michael Kolupaev](https://github.com/al13n321)).
+* Fix race condition between ProcessList and Settings. [#67645](https://github.com/ClickHouse/ClickHouse/pull/67645) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* `02481_async_insert_race_long.sh` flakiness fixes. [#67650](https://github.com/ClickHouse/ClickHouse/pull/67650) ([Julia Kartseva](https://github.com/jkartseva)).
+* Fixes [#67651](https://github.com/ClickHouse/ClickHouse/issues/67651). [#67653](https://github.com/ClickHouse/ClickHouse/pull/67653) ([pufit](https://github.com/pufit)).
+* Fix flaky `test_replicated_table_attach`. [#67658](https://github.com/ClickHouse/ClickHouse/pull/67658) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update version_date.tsv and changelogs after v24.4.4.113-stable. [#67659](https://github.com/ClickHouse/ClickHouse/pull/67659) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Even better healthcheck for ldap. [#67667](https://github.com/ClickHouse/ClickHouse/pull/67667) ([Andrey Zvonov](https://github.com/zvonand)).
+* Fix 03203_client_benchmark_options. [#67671](https://github.com/ClickHouse/ClickHouse/pull/67671) ([vdimir](https://github.com/vdimir)).
+* Integration tests: fix ports clashing problem. [#67672](https://github.com/ClickHouse/ClickHouse/pull/67672) ([Nikita Fomichev](https://github.com/fm4v)).
+* Remove some `no-parallel` tags from tests (Part 2). [#67673](https://github.com/ClickHouse/ClickHouse/pull/67673) ([Raúl Marín](https://github.com/Algunenano)).
+* Use FunctionArgumentDescriptors for bitSlice. [#67674](https://github.com/ClickHouse/ClickHouse/pull/67674) ([Lennard Eijsackers](https://github.com/Blokje5)).
+* Update version_date.tsv and changelog after v24.3.6.48-lts. [#67677](https://github.com/ClickHouse/ClickHouse/pull/67677) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Avoid ddl queue timeout in 02313_filesystem_cache_seeks. [#67680](https://github.com/ClickHouse/ClickHouse/pull/67680) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix bad log message in sort description. [#67690](https://github.com/ClickHouse/ClickHouse/pull/67690) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Update version_date.tsv and changelog after v23.8.16.40-lts. [#67692](https://github.com/ClickHouse/ClickHouse/pull/67692) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix check names in test reports and the CI Logs database. [#67696](https://github.com/ClickHouse/ClickHouse/pull/67696) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* CI: Minor refactoring in ci_utils. [#67706](https://github.com/ClickHouse/ClickHouse/pull/67706) ([Max K.](https://github.com/maxknv)).
+* Fix 01042_system_reload_dictionary_reloads_completely flakiness. [#67719](https://github.com/ClickHouse/ClickHouse/pull/67719) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test `00002_log_and_exception_messages_formatting`. [#67723](https://github.com/ClickHouse/ClickHouse/pull/67723) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `02789_reading_from_s3_with_connection_pool`. [#67726](https://github.com/ClickHouse/ClickHouse/pull/67726) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix strange code in HostResolvePool. [#67727](https://github.com/ClickHouse/ClickHouse/pull/67727) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix a typo. [#67729](https://github.com/ClickHouse/ClickHouse/pull/67729) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Smart handling of processes leftovers in tests. [#67737](https://github.com/ClickHouse/ClickHouse/pull/67737) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test retries. [#67738](https://github.com/ClickHouse/ClickHouse/pull/67738) ([Azat Khuzhin](https://github.com/azat)).
+* Fill only selected columns from system.clusters. [#67739](https://github.com/ClickHouse/ClickHouse/pull/67739) ([Azat Khuzhin](https://github.com/azat)).
+* Bump NuRaft (to properly catch thread exceptions). [#67740](https://github.com/ClickHouse/ClickHouse/pull/67740) ([Azat Khuzhin](https://github.com/azat)).
+* Try to fix RabbitMQ test failures. [#67743](https://github.com/ClickHouse/ClickHouse/pull/67743) ([Azat Khuzhin](https://github.com/azat)).
+* Stateless tests: attempt to fix timeouts of `02473_multistep_prewhere* 00411_long_accurate_number_comparison*`. [#67746](https://github.com/ClickHouse/ClickHouse/pull/67746) ([Nikita Fomichev](https://github.com/fm4v)).
+* Fix test_ttl_move::test_alter_with_merge_work flakiness. [#67747](https://github.com/ClickHouse/ClickHouse/pull/67747) ([Azat Khuzhin](https://github.com/azat)).
+* ci: better stateless runner (correctly collect artifacts and also some basic errors capturing). [#67752](https://github.com/ClickHouse/ClickHouse/pull/67752) ([Azat Khuzhin](https://github.com/azat)).
+* Introduce `no-flaky-check` tag. [#67755](https://github.com/ClickHouse/ClickHouse/pull/67755) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Whitespaces. [#67771](https://github.com/ClickHouse/ClickHouse/pull/67771) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* [RFC] Print original query for AST formatting check on CI. [#67776](https://github.com/ClickHouse/ClickHouse/pull/67776) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test `02833_concurrent_sessions`, Fix test `02835_drop_user_during_session`. [#67779](https://github.com/ClickHouse/ClickHouse/pull/67779) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix diagnostics in the test script. [#67780](https://github.com/ClickHouse/ClickHouse/pull/67780) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `02231_bloom_filter_sizing`. [#67784](https://github.com/ClickHouse/ClickHouse/pull/67784) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fixed session_log related tests race condition on logout. [#67785](https://github.com/ClickHouse/ClickHouse/pull/67785) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
+* ci/stateless: fix artifacts post-processing and detect if something failed there. [#67791](https://github.com/ClickHouse/ClickHouse/pull/67791) ([Azat Khuzhin](https://github.com/azat)).
+* Integration tests: fix flaky `test_dictionaries_update_and_reload::test_reload_after_fail_by_timer`. [#67793](https://github.com/ClickHouse/ClickHouse/pull/67793) ([Nikita Fomichev](https://github.com/fm4v)).
+* Fix possible CANNOT_READ_ALL_DATA during server startup in performance tests. [#67795](https://github.com/ClickHouse/ClickHouse/pull/67795) ([Azat Khuzhin](https://github.com/azat)).
+* Reduce table size in 03037_dynamic_merges_2* tests. [#67797](https://github.com/ClickHouse/ClickHouse/pull/67797) ([Kruglov Pavel](https://github.com/Avogar)).
+* Disable 03038_nested_dynamic_merges* under sanitizers because it's too slow. [#67798](https://github.com/ClickHouse/ClickHouse/pull/67798) ([Kruglov Pavel](https://github.com/Avogar)).
+* Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Update comment. [#67801](https://github.com/ClickHouse/ClickHouse/pull/67801) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix bad test `03032_redundant_equals`. [#67822](https://github.com/ClickHouse/ClickHouse/pull/67822) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Update tags for a few tests. [#67829](https://github.com/ClickHouse/ClickHouse/pull/67829) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Add debug logging for window view tests. [#67841](https://github.com/ClickHouse/ClickHouse/pull/67841) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Closes [#67621](https://github.com/ClickHouse/ClickHouse/issues/67621). [#67843](https://github.com/ClickHouse/ClickHouse/pull/67843) ([Ilya Yatsishin](https://github.com/qoega)).
+* Fix query cache randomization in stress tests. [#67855](https://github.com/ClickHouse/ClickHouse/pull/67855) ([Azat Khuzhin](https://github.com/azat)).
+* Update version_date.tsv and changelogs after v24.5.5.78-stable. [#67863](https://github.com/ClickHouse/ClickHouse/pull/67863) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Un-flake 02524_fuzz_and_fuss_2. [#67867](https://github.com/ClickHouse/ClickHouse/pull/67867) ([Robert Schulze](https://github.com/rschu1ze)).
+* Misc fixes. [#67869](https://github.com/ClickHouse/ClickHouse/pull/67869) ([Alexey Katsman](https://github.com/alexkats)).
+* Fixes [#67444](https://github.com/ClickHouse/ClickHouse/issues/67444). [#67873](https://github.com/ClickHouse/ClickHouse/pull/67873) ([Konstantin Bogdanov](https://github.com/thevar1able)).
+* no-msan 00314_sample_factor_virtual_column. [#67874](https://github.com/ClickHouse/ClickHouse/pull/67874) ([Michael Kolupaev](https://github.com/al13n321)).
+* Revert "Revert "Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892)"". [#67877](https://github.com/ClickHouse/ClickHouse/pull/67877) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Lazily create invalid files in S3. [#67882](https://github.com/ClickHouse/ClickHouse/pull/67882) ([Antonio Andelic](https://github.com/antonio2368)).
+* Do not try to create azure container if not needed. [#67896](https://github.com/ClickHouse/ClickHouse/pull/67896) ([Anton Popov](https://github.com/CurtizJ)).
+* CI: Fix for setting Mergeable Check from sync. [#67898](https://github.com/ClickHouse/ClickHouse/pull/67898) ([Max K.](https://github.com/maxknv)).
+* Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing. [#67904](https://github.com/ClickHouse/ClickHouse/pull/67904) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update version_date.tsv and changelogs after v24.6.3.95-stable. [#67910](https://github.com/ClickHouse/ClickHouse/pull/67910) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Remove some no-parallel tags from tests (Part 3). [#67914](https://github.com/ClickHouse/ClickHouse/pull/67914) ([Raúl Marín](https://github.com/Algunenano)).
+* Follow up [#67235](https://github.com/ClickHouse/ClickHouse/issues/67235). [#67917](https://github.com/ClickHouse/ClickHouse/pull/67917) ([Nikita Taranov](https://github.com/nickitat)).
+* CI: Changelog: Critical Bug Fix to Bug Fix. [#67919](https://github.com/ClickHouse/ClickHouse/pull/67919) ([Max K.](https://github.com/maxknv)).
+* CI: Multi-channel CiBuddy. [#67923](https://github.com/ClickHouse/ClickHouse/pull/67923) ([Max K.](https://github.com/maxknv)).
+* more logs to debug logical error from async inserts. [#67928](https://github.com/ClickHouse/ClickHouse/pull/67928) ([Han Fei](https://github.com/hanfei1991)).
+* Fix stress test error with TDigest statistics. [#67930](https://github.com/ClickHouse/ClickHouse/pull/67930) ([Robert Schulze](https://github.com/rschu1ze)).
+* Remove some no-parallel tags from tests (Part 4). [#67932](https://github.com/ClickHouse/ClickHouse/pull/67932) ([Raúl Marín](https://github.com/Algunenano)).
+* Upgrade QPL to v1.6.0. [#67933](https://github.com/ClickHouse/ClickHouse/pull/67933) ([Maria Zhukova](https://github.com/mzhukova)).
+* CI: Strict job timeout 1.5h for tests, 2h for builds. [#67934](https://github.com/ClickHouse/ClickHouse/pull/67934) ([Max K.](https://github.com/maxknv)).
+* Remove slow tests from fasttest check. [#67941](https://github.com/ClickHouse/ClickHouse/pull/67941) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix memory corruption in usearch. [#67942](https://github.com/ClickHouse/ClickHouse/pull/67942) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68547](https://github.com/ClickHouse/ClickHouse/issues/68547): Timeout handling for functional and integration tests, store artifacts and report if timed out - sets 2h default timeout for all jobs. [#67944](https://github.com/ClickHouse/ClickHouse/pull/67944) ([Max K.](https://github.com/maxknv)).
+* Unflake 02099_tsv_raw_format.sh. [#67947](https://github.com/ClickHouse/ClickHouse/pull/67947) ([Robert Schulze](https://github.com/rschu1ze)).
+* This closes: [#67866](https://github.com/ClickHouse/ClickHouse/issues/67866). [#67950](https://github.com/ClickHouse/ClickHouse/pull/67950) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Change log level of an insignificant message in clickhouse-local. [#67952](https://github.com/ClickHouse/ClickHouse/pull/67952) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Fix flaky `test_storage_s3_queue/test.py::test_multiple_tables_streaming_sync_distributed`. [#67959](https://github.com/ClickHouse/ClickHouse/pull/67959) ([Julia Kartseva](https://github.com/jkartseva)).
+* tests: fix 03002_part_log_rmt_fetch_merge_error flakiness. [#67960](https://github.com/ClickHouse/ClickHouse/pull/67960) ([Azat Khuzhin](https://github.com/azat)).
+* Fix timeout of 02310_clickhouse_local_INSERT_progress_profile_events. [#67961](https://github.com/ClickHouse/ClickHouse/pull/67961) ([Robert Schulze](https://github.com/rschu1ze)).
+* Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. III. [#67964](https://github.com/ClickHouse/ClickHouse/pull/67964) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update minio in stateless tests. [#67975](https://github.com/ClickHouse/ClickHouse/pull/67975) ([Antonio Andelic](https://github.com/antonio2368)).
+* CI: Integration tests uncover some logging. [#67978](https://github.com/ClickHouse/ClickHouse/pull/67978) ([Max K.](https://github.com/maxknv)).
+* Fix 03130_convert_outer_join_to_inner_join. [#67980](https://github.com/ClickHouse/ClickHouse/pull/67980) ([vdimir](https://github.com/vdimir)).
+* Collect minio audit logs in stateless tests. [#67998](https://github.com/ClickHouse/ClickHouse/pull/67998) ([Antonio Andelic](https://github.com/antonio2368)).
+* Remove some no-parallel tags from tests (Part 5). [#68002](https://github.com/ClickHouse/ClickHouse/pull/68002) ([Raúl Marín](https://github.com/Algunenano)).
+* Minor fixes in tables.md. [#68004](https://github.com/ClickHouse/ClickHouse/pull/68004) ([Ilya Yatsishin](https://github.com/qoega)).
+* Follow up for [#67843](https://github.com/ClickHouse/ClickHouse/issues/67843). [#68007](https://github.com/ClickHouse/ClickHouse/pull/68007) ([Ilya Yatsishin](https://github.com/qoega)).
+* Remove unused CLI option. [#68008](https://github.com/ClickHouse/ClickHouse/pull/68008) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `02845_threads_count_in_distributed_queries`. [#68011](https://github.com/ClickHouse/ClickHouse/pull/68011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* CI: Pass job timeout into tests config. [#68013](https://github.com/ClickHouse/ClickHouse/pull/68013) ([Nikita Fomichev](https://github.com/fm4v)).
+* Add a test for [#57420](https://github.com/ClickHouse/ClickHouse/issues/57420). [#68017](https://github.com/ClickHouse/ClickHouse/pull/68017) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Revert "Revert "Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing"". [#68021](https://github.com/ClickHouse/ClickHouse/pull/68021) ([Robert Schulze](https://github.com/rschu1ze)).
+* CI: Fix for filtering jobs in PRs. [#68022](https://github.com/ClickHouse/ClickHouse/pull/68022) ([Max K.](https://github.com/maxknv)).
+* Docs: Update 3rd party library guide. [#68027](https://github.com/ClickHouse/ClickHouse/pull/68027) ([Robert Schulze](https://github.com/rschu1ze)).
+* Refactor tests for (experimental) statistics. [#68034](https://github.com/ClickHouse/ClickHouse/pull/68034) ([Robert Schulze](https://github.com/rschu1ze)).
+* Split `00284_external_aggregation.sql`. [#68037](https://github.com/ClickHouse/ClickHouse/pull/68037) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update version_date.tsv and changelog after v24.7.3.42-stable. [#68045](https://github.com/ClickHouse/ClickHouse/pull/68045) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update test_drop_is_lock_free/test.py. [#68051](https://github.com/ClickHouse/ClickHouse/pull/68051) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fixes [#67865](https://github.com/ClickHouse/ClickHouse/issues/67865). [#68054](https://github.com/ClickHouse/ClickHouse/pull/68054) ([Konstantin Bogdanov](https://github.com/thevar1able)).
+* Disable randomization of `trace_profile_events` in clickhouse-test. [#68058](https://github.com/ClickHouse/ClickHouse/pull/68058) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Minor CMake cleanup. [#68069](https://github.com/ClickHouse/ClickHouse/pull/68069) ([Robert Schulze](https://github.com/rschu1ze)).
+* If the test cluster is overloaded, sometimes simple query execution [can take more time](https://pastila.nl/?00224e71/f017cd6675b52ccc205c81aa62a47de5#8dB4+C4MOdOi3NLV1dc0Fg==) than `Buffer`'s max time to flush. This PR doubles the timeout and allows to skip the check in case of significant latency. [#68072](https://github.com/ClickHouse/ClickHouse/pull/68072) ([pufit](https://github.com/pufit)).
+* Fix flaky `02675_profile_events_from_query_log_and_client`. [#68097](https://github.com/ClickHouse/ClickHouse/pull/68097) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix race in `WithRetries`. [#68106](https://github.com/ClickHouse/ClickHouse/pull/68106) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add empty cell to reports when time is missing. [#68112](https://github.com/ClickHouse/ClickHouse/pull/68112) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fix test `00900_long_parquet_load`. [#68130](https://github.com/ClickHouse/ClickHouse/pull/68130) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* tests: fix 01246_buffer_flush flakiness due to slow trace_log flush. [#68134](https://github.com/ClickHouse/ClickHouse/pull/68134) ([Azat Khuzhin](https://github.com/azat)).
+* Only use Field::safeGet - Field::get prone to type punning. [#68135](https://github.com/ClickHouse/ClickHouse/pull/68135) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* tests: attempt to fix 01600_parts_states_metrics_long (by forbid parallel run). [#68136](https://github.com/ClickHouse/ClickHouse/pull/68136) ([Azat Khuzhin](https://github.com/azat)).
+* Fix01710 Timeout. [#68138](https://github.com/ClickHouse/ClickHouse/pull/68138) ([jsc0218](https://github.com/jsc0218)).
+* Remove the extra cell from reports when it is not necessary. [#68145](https://github.com/ClickHouse/ClickHouse/pull/68145) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Remove "Processing configuration file" message from clickhouse-local. [#68157](https://github.com/ClickHouse/ClickHouse/pull/68157) ([Azat Khuzhin](https://github.com/azat)).
+* tests: fix 02122_join_group_by_timeout flakiness. [#68160](https://github.com/ClickHouse/ClickHouse/pull/68160) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `test_cluster_all_replicas`. [#68178](https://github.com/ClickHouse/ClickHouse/pull/68178) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix leftovers. [#68181](https://github.com/ClickHouse/ClickHouse/pull/68181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `01172_transaction_counters`. [#68182](https://github.com/ClickHouse/ClickHouse/pull/68182) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Refactor tests for (experimental) statistics. [#68186](https://github.com/ClickHouse/ClickHouse/pull/68186) ([Robert Schulze](https://github.com/rschu1ze)).
+* Remove Log engine from Kafka integration tests. [#68200](https://github.com/ClickHouse/ClickHouse/pull/68200) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)).
+* Replace segfault in Replicated database with logical error. [#68250](https://github.com/ClickHouse/ClickHouse/pull/68250) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#68423](https://github.com/ClickHouse/ClickHouse/issues/68423): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68374](https://github.com/ClickHouse/ClickHouse/issues/68374): Rename: S3DiskNoKeyErrors -> DiskS3NoSuchKeyErrors. [#68361](https://github.com/ClickHouse/ClickHouse/pull/68361) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
+* Backported in [#68637](https://github.com/ClickHouse/ClickHouse/issues/68637): Check for invalid regexp in JSON SKIP REGEXP section. [#68451](https://github.com/ClickHouse/ClickHouse/pull/68451) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68485](https://github.com/ClickHouse/ClickHouse/issues/68485): Better inference of date times 2. [#68452](https://github.com/ClickHouse/ClickHouse/pull/68452) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68540](https://github.com/ClickHouse/ClickHouse/issues/68540): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68518](https://github.com/ClickHouse/ClickHouse/issues/68518): Minor update in Dynamic/JSON serializations. [#68459](https://github.com/ClickHouse/ClickHouse/pull/68459) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68558](https://github.com/ClickHouse/ClickHouse/issues/68558): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68576](https://github.com/ClickHouse/ClickHouse/issues/68576): CI: Tidy build timeout from 2h to 3h. [#68567](https://github.com/ClickHouse/ClickHouse/pull/68567) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 95ef8c0de90..8556375d543 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,4 @@
+v24.8.1.2684-lts	2024-08-21
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30

From 5965297d8b2f26768fb0ee13a9aeec6d7cada0c9 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:35:39 +0200
Subject: [PATCH 303/363] add accidentally removed virtual column

---
 src/Storages/VirtualColumnUtils.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 5b974cb8a22..bbeb9ee6643 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -172,6 +172,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
     add_virtual("_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
     add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
     add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
+    add_virtual("_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
 
     if (context->getSettingsRef().use_hive_partitioning)
     {

From b5dadd778b3db0d28e9f1650503ef3f5de42895e Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 21 Aug 2024 17:02:25 +0200
Subject: [PATCH 304/363] temp-commit

---
 .../02293_http_header_full_summary_without_progress.sh           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
index a08928a773c..b888d977129 100755
--- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
+++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
@@ -20,6 +20,7 @@ then
   echo "Read rows in summary is not zero"
 else
   echo "Read rows in summary is zero!"
+  echo "${CURL_OUTPUT}"
 fi
 
 # Check that the response code is correct too

From e87de3cfcd22870bf7aea3dfaf1607b180b2b1d8 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 21 Aug 2024 15:19:29 +0000
Subject: [PATCH 305/363] return back virtual columns to distributed tables

---
 src/Storages/StorageDistributed.cpp           |  4 ++++
 .../03228_virtual_column_merge_dist.reference |  8 +++++++
 .../03228_virtual_column_merge_dist.sql       | 24 +++++++++++++++++++
 3 files changed, 36 insertions(+)
 create mode 100644 tests/queries/0_stateless/03228_virtual_column_merge_dist.reference
 create mode 100644 tests/queries/0_stateless/03228_virtual_column_merge_dist.sql

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index e146e95f89f..c4668159759 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -290,6 +290,10 @@ VirtualColumnsDescription StorageDistributed::createVirtuals()
 
     desc.addEphemeral("_shard_num", std::make_shared<DataTypeUInt32>(), "Deprecated. Use function shardNum instead");
 
+    /// Add virtual columns from table of storage Merges.
+    desc.addEphemeral("_database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of database which the row comes from");
+    desc.addEphemeral("_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of table which the row comes from");
+
     return desc;
 }
 
diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference
new file mode 100644
index 00000000000..28f00bafdfe
--- /dev/null
+++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference
@@ -0,0 +1,8 @@
+1	t_local_1
+2	t_local_2
+1	t_local_1
+2	t_local_2
+1	1
+2	1
+1	1
+2	1
diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql
new file mode 100644
index 00000000000..caf00a2e407
--- /dev/null
+++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS t_local_1;
+DROP TABLE IF EXISTS t_local_2;
+DROP TABLE IF EXISTS t_merge;
+DROP TABLE IF EXISTS t_distr;
+
+CREATE TABLE t_local_1 (a UInt32) ENGINE = MergeTree ORDER BY a;
+CREATE TABLE t_local_2 (a UInt32) ENGINE = MergeTree ORDER BY  a;
+
+INSERT INTO t_local_1 VALUES (1);
+INSERT INTO t_local_2 VALUES (2);
+
+CREATE TABLE t_merge AS t_local_1 ENGINE = Merge(currentDatabase(), '^(t_local_1|t_local_2)$');
+CREATE TABLE t_distr AS t_local_1 engine=Distributed('test_shard_localhost', currentDatabase(), t_merge, rand());
+
+SELECT a, _table FROM t_merge ORDER BY a;
+SELECT a, _table FROM t_distr ORDER BY a;
+
+SELECT a, _database = currentDatabase() FROM t_merge ORDER BY a;
+SELECT a, _database = currentDatabase() FROM t_distr ORDER BY a;
+
+DROP TABLE IF EXISTS t_local_1;
+DROP TABLE IF EXISTS t_local_2;
+DROP TABLE IF EXISTS t_merge;
+DROP TABLE IF EXISTS t_distr;

From 0c2ad11af27bb4a914475113652345fed1b8d6b1 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 21 Aug 2024 17:49:51 +0200
Subject: [PATCH 306/363] Fix flaky check when all tests are skipped

---
 tests/docker_scripts/stateless_runner.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/docker_scripts/stateless_runner.sh b/tests/docker_scripts/stateless_runner.sh
index 671b1f5ca71..40a63f74a6b 100755
--- a/tests/docker_scripts/stateless_runner.sh
+++ b/tests/docker_scripts/stateless_runner.sh
@@ -80,6 +80,9 @@ fi
 
 export IS_FLAKY_CHECK=0
 
+# Export NUM_TRIES so python scripts will see its value as env variable
+export NUM_TRIES
+
 # For flaky check we also enable thread fuzzer
 if [ "$NUM_TRIES" -gt "1" ]; then
     export IS_FLAKY_CHECK=1

From 2e58ac56111a075bdbaee566a4484a193a882792 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Wed, 21 Aug 2024 16:30:42 +0000
Subject: [PATCH 307/363] build fix

---
 src/Storages/VirtualColumnUtils.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index bbeb9ee6643..7e3e902f083 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -156,16 +156,20 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
 
     auto add_virtual = [&](const auto & name, const auto & type)
     {
-        auto local_type = type;
-        if (storage_columns.has(name) && !context->getSettingsRef().use_hive_partitioning)
-            return;
         if (storage_columns.has(name))
         {
-            local_type = storage_columns.get(name).type;
+            if (!context->getSettingsRef().use_hive_partitioning)
+                return;
+
+            if (storage_columns.size() == 1)
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use a file with one column {}, that is ised during hive partitioning", name);
+            auto local_type = storage_columns.get(name).type;
             storage_columns.remove(name);
+            desc.addEphemeral(name, local_type, "");
+            return;
         }
 
-        desc.addEphemeral(name, local_type, "");
+        desc.addEphemeral(name, type, "");
     };
 
     add_virtual("_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));

From a52eff299eb49291e2b57f68e5b2874c7704f9d2 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 21 Aug 2024 19:43:45 +0200
Subject: [PATCH 308/363] fix tests

---
 src/Storages/VirtualColumnUtils.cpp           |   2 +-
 .../test_storage_azure_blob_storage/test.py   |  43 ++----
 tests/integration/test_storage_hdfs/test.py   |  24 ++-
 .../03203_hive_style_partitioning.reference   | 145 +++++++++---------
 .../03203_hive_style_partitioning.sh          |   2 -
 5 files changed, 100 insertions(+), 116 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 7e3e902f083..ca82a1ce67a 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
                 return;
 
             if (storage_columns.size() == 1)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use a file with one column {}, that is ised during hive partitioning", name);
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file");
             auto local_type = storage_columns.get(name).type;
             storage_columns.remove(name);
             desc.addEphemeral(name, local_type, "");
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index fbdc7f29f98..637dbd38262 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}')"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
+        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format(
             bucket="cont", max_path=path
         )
     ]
@@ -1533,14 +1533,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
     query = (
         f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == ["Gordon"]
 
 
-def test_hive_partitioning_with_two_parameters(cluster):
+def test_hive_partitioning_with_all_parameters(cluster):
     # type: (ClickHouseCluster) -> None
     node = cluster.instances["node"]  # type: ClickHouseInstance
     table_format = "column1 String, column2 String"
@@ -1556,35 +1556,14 @@ def test_hive_partitioning_with_two_parameters(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
-            bucket="cont", max_path=path
-        )
-    ]
+    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
 
-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
-    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
-
-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
-    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
+    with pytest.raises(Exception, match=pattern):
+        azure_query(node, query, settings={"use_hive_partitioning": 1})
 
 
 def test_hive_partitioning_without_setting(cluster):
@@ -1603,9 +1582,9 @@ def test_hive_partitioning_without_setting(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
     pattern = re.compile(
         r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index fdbf7c5bacb..ad2e7084791 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1263,13 +1263,19 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
 
     r = node1.query(
-        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
         settings={"use_hive_partitioning": 1},
     )
     assert r == f"Elizabeth\n"
 
+    r = node1.query(
+        "SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        settings={"use_hive_partitioning": 1},
+    )
+    assert r == f"Gordon\n"
 
-def test_hive_partitioning_with_two_parameters(started_cluster):
+
+def test_hive_partitioning_with_all_parameters(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(
         f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
@@ -1279,11 +1285,13 @@ def test_hive_partitioning_with_two_parameters(started_cluster):
         == f"Elizabeth\tGordon\n"
     )
 
-    r = node1.query(
-        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-        settings={"use_hive_partitioning": 1},
-    )
-    assert r == f"Gordon\n"
+    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
+
+    with pytest.raises(QueryRuntimeException, match=pattern):
+        node1.query(
+            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            settings={"use_hive_partitioning": 1},
+        )
 
 
 def test_hive_partitioning_without_setting(started_cluster):
@@ -1301,7 +1309,7 @@ def test_hive_partitioning_without_setting(started_cluster):
 
     with pytest.raises(QueryRuntimeException, match=pattern):
         node1.query(
-            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
             settings={"use_hive_partitioning": 0},
         )
 
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index 12ffd17c102..b5eaef7f51e 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -1,4 +1,14 @@
 TESTING THE FILE HIVE PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@@ -10,25 +20,16 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 42	2020-01-01
 [1,2,3]	42.42
 Array(Int64)	LowCardinality(Float64)
@@ -37,10 +38,20 @@ Array(Int64)	LowCardinality(Float64)
 4081
 2070
 2070
-1
-1
 b
+1
+1
 TESTING THE URL PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@@ -52,26 +63,18 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
 1
 TESTING THE S3 PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@@ -83,39 +86,35 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 OK
 TESTING THE S3CLUSTER PARTITIONING
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 5a0bd482985..41b215578f0 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -13,8 +13,6 @@ set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
 SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
 

From f74b580ddaaf0b339b3370b603c901a22b5d0594 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 21 Aug 2024 19:52:02 +0200
Subject: [PATCH 309/363] More sanity checks

---
 ...02293_http_header_full_summary_without_progress.sh | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
index b888d977129..29d59a8e45b 100755
--- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
+++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
@@ -6,8 +6,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 
+# Sanity check to ensure that the server is up and running
+for i in {1..10}; do
+    echo 'SELECT 1' | ${CLICKHOUSE_CURL_COMMAND} -s "${CLICKHOUSE_URL}" --data-binary @- > /dev/null
+    if [ $? -eq 0 ]; then
+        break
+    fi
+    sleep 1
+done
+
 CURL_OUTPUT=$(echo 'SELECT 1 + sleepEachRow(0.00002) FROM numbers(100000)' | \
-  ${CLICKHOUSE_CURL_COMMAND} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1)
+  ${CLICKHOUSE_CURL_COMMAND} --max-time 3 -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1)
 
 READ_ROWS=$(echo "${CURL_OUTPUT}" | \
   grep 'X-ClickHouse-Summary' | \

From 09dfec99c756067170ac36fe13e4e8948d92427f Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 21 Aug 2024 20:30:13 +0200
Subject: [PATCH 310/363] Lint

---
 .../02293_http_header_full_summary_without_progress.sh          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
index 29d59a8e45b..0d8a568fef0 100755
--- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
+++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 
 # Sanity check to ensure that the server is up and running
-for i in {1..10}; do
+for _ in {1..10}; do
     echo 'SELECT 1' | ${CLICKHOUSE_CURL_COMMAND} -s "${CLICKHOUSE_URL}" --data-binary @- > /dev/null
     if [ $? -eq 0 ]; then
         break

From 725f1791f48376905da317b9891cb2860c8c8a33 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Wed, 21 Aug 2024 21:34:25 +0200
Subject: [PATCH 311/363] CI: Binary tidy timeout 3h -> 4h

---
 tests/ci/ci_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 58de25f039f..887742840a2 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -163,7 +163,7 @@ class CI:
                 tidy=True,
                 comment="clang-tidy is used for static analysis",
             ),
-            timeout=10800,
+            timeout=14400,
         ),
         BuildNames.BINARY_DARWIN: CommonJobConfigs.BUILD.with_properties(
             build_config=BuildConfig(

From d350f7bc1ab9e9454f7d84a1cc27176f3790be9b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 21 Aug 2024 20:20:32 +0000
Subject: [PATCH 312/363] Improve accuracy of ICU-correcting code by 12.6%

---
 src/Functions/LowerUpperUTF8Impl.h                    | 11 ++++++-----
 .../0_stateless/00170_lower_upper_utf8.reference      |  1 +
 tests/queries/0_stateless/00170_lower_upper_utf8.sql  |  3 +++
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index 5da085f48e5..8469bedde0c 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -6,7 +6,6 @@
 
 #include <Columns/ColumnString.h>
 #include <Functions/LowerUpperImpl.h>
-#include <base/find_symbols.h>
 #include <unicode/unistr.h>
 #include <Common/StringUtils.h>
 
@@ -43,7 +42,7 @@ struct LowerUpperUTF8Impl
 
         String output;
         size_t curr_offset = 0;
-        for (size_t i = 0; i < offsets.size(); ++i)
+        for (size_t i = 0; i < input_rows_count; ++i)
         {
             const auto * data_start = reinterpret_cast<const char *>(&data[offsets[i - 1]]);
             size_t size = offsets[i] - offsets[i - 1];
@@ -57,13 +56,15 @@ struct LowerUpperUTF8Impl
             output.clear();
             input.toUTF8String(output);
 
-            /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first
+            /// For valid UTF-8 input strings, ICU sometimes produces output with an extra '\0 at the end. Only the data before that
             /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this
             /// case, the behavior is also reasonable.
-            const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size());
-            size_t valid_size = res_end ? res_end - output.data() + 1 : 0;
+            size_t valid_size = output.size();
+            if (!output.empty() && output.back() == '\0')
+                --valid_size;
 
             res_data.resize(curr_offset + valid_size + 1);
+
             memcpy(&res_data[curr_offset], output.data(), valid_size);
             res_data[curr_offset + valid_size] = 0;
 
diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference
index 3c644f22b9b..b1cb9ad5b57 100644
--- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference
+++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference
@@ -26,3 +26,4 @@
 1
 1
 1
+2
diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql
index 85b6c5c6095..7c7bbac0df3 100644
--- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql
+++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql
@@ -38,3 +38,6 @@ select lowerUTF8('ır') = 'ır';
 -- German language
 select upper('öäüß') = 'öäüß';
 select lower('ÖÄÜẞ') = 'ÖÄÜẞ';
+
+-- Bug 68680
+SELECT lengthUTF8(lowerUTF8('Ä\0'));

From 6e5465ae5126f3281d81172e952b6811f8946f2d Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Wed, 21 Aug 2024 15:47:08 +0200
Subject: [PATCH 313/363] CI: SQLLogix job fix

---
 docker/test/sqllogic/Dockerfile                |  3 ---
 tests/ci/sqllogic_test.py                      | 10 ++++------
 .../docker_scripts/sqllogic_runner.sh          | 18 +++++++++---------
 3 files changed, 13 insertions(+), 18 deletions(-)
 rename docker/test/sqllogic/run.sh => tests/docker_scripts/sqllogic_runner.sh (87%)

diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile
index 6397526388e..0d21a2da44e 100644
--- a/docker/test/sqllogic/Dockerfile
+++ b/docker/test/sqllogic/Dockerfile
@@ -40,6 +40,3 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git"
 
 RUN git clone --recursive ${sqllogic_test_repo}
-
-COPY run.sh /
-CMD ["/bin/bash", "/run.sh"]
diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py
index 63880f07e92..7fe44c235c7 100755
--- a/tests/ci/sqllogic_test.py
+++ b/tests/ci/sqllogic_test.py
@@ -31,7 +31,7 @@ IMAGE_NAME = "clickhouse/sqllogic-test"
 
 def get_run_command(
     builds_path: Path,
-    repo_tests_path: Path,
+    repo_path: Path,
     result_path: Path,
     server_log_path: Path,
     image: DockerImage,
@@ -39,11 +39,11 @@ def get_run_command(
     return (
         f"docker run "
         f"--volume={builds_path}:/package_folder "
-        f"--volume={repo_tests_path}:/clickhouse-tests "
+        f"--volume={repo_path}:/repo "
         f"--volume={result_path}:/test_output "
         f"--volume={server_log_path}:/var/log/clickhouse-server "
         "--security-opt seccomp=unconfined "  # required to issue io_uring sys-calls
-        f"--cap-add=SYS_PTRACE {image}"
+        f"--cap-add=SYS_PTRACE {image} /repo/tests/docker_scripts/sqllogic_runner.sh"
     )
 
 
@@ -94,8 +94,6 @@ def main():
 
     docker_image = pull_image(get_docker_image(IMAGE_NAME))
 
-    repo_tests_path = repo_path / "tests"
-
     packages_path = temp_path / "packages"
     packages_path.mkdir(parents=True, exist_ok=True)
 
@@ -111,7 +109,7 @@ def main():
 
     run_command = get_run_command(  # run script inside docker
         packages_path,
-        repo_tests_path,
+        repo_path,
         result_path,
         server_log_path,
         docker_image,
diff --git a/docker/test/sqllogic/run.sh b/tests/docker_scripts/sqllogic_runner.sh
similarity index 87%
rename from docker/test/sqllogic/run.sh
rename to tests/docker_scripts/sqllogic_runner.sh
index 32368980f9b..8b8f1e7aec7 100755
--- a/docker/test/sqllogic/run.sh
+++ b/tests/docker_scripts/sqllogic_runner.sh
@@ -15,10 +15,10 @@ echo "Files in current directory"
 ls -la ./
 echo "Files in root directory"
 ls -la /
-echo "Files in /clickhouse-tests directory"
-ls -la /clickhouse-tests
-echo "Files in /clickhouse-tests/sqllogic directory"
-ls -la /clickhouse-tests/sqllogic
+echo "Files in /repo/tests directory"
+ls -la /repo/tests
+echo "Files in /repo/tests/sqllogic directory"
+ls -la /repo/tests/sqllogic
 echo "Files in /package_folder directory"
 ls -la /package_folder
 echo "Files in /test_output"
@@ -45,13 +45,13 @@ function run_tests()
 
     cd /test_output
 
-    /clickhouse-tests/sqllogic/runner.py --help 2>&1 \
+    /repo/tests/sqllogic/runner.py --help 2>&1 \
         | ts '%Y-%m-%d %H:%M:%S'
 
     mkdir -p /test_output/self-test
-    /clickhouse-tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \
+    /repo/tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \
         self-test \
-        --self-test-dir /clickhouse-tests/sqllogic/self-test \
+        --self-test-dir /repo/tests/sqllogic/self-test \
         --out-dir /test_output/self-test \
         2>&1 \
         | ts '%Y-%m-%d %H:%M:%S'
@@ -63,7 +63,7 @@ function run_tests()
     if [ -d /sqllogictest ]
     then
         mkdir -p /test_output/statements-test
-        /clickhouse-tests/sqllogic/runner.py \
+        /repo/tests/sqllogic/runner.py \
         --log-file /test_output/runner-statements-test.log \
         --log-level info \
             statements-test \
@@ -77,7 +77,7 @@ function run_tests()
         tar -zcvf statements-check.tar.gz statements-test 1>/dev/null
 
         mkdir -p /test_output/complete-test
-        /clickhouse-tests/sqllogic/runner.py \
+        /repo/tests/sqllogic/runner.py \
         --log-file /test_output/runner-complete-test.log \
         --log-level info \
             complete-test \

From a1f312495d4a2d18052f96aee3831fa7de2588f6 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Wed, 21 Aug 2024 22:36:00 +0200
Subject: [PATCH 314/363] CI: Integration tests timeout to 3h

---
 tests/ci/ci_config.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 58de25f039f..fdf863797ef 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -400,7 +400,10 @@ class CI:
             required_builds=[BuildNames.PACKAGE_DEBUG], pr_only=True
         ),
         JobNames.INTEGRATION_TEST_ASAN: CommonJobConfigs.INTEGRATION_TEST.with_properties(
-            required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4
+            required_builds=[BuildNames.PACKAGE_ASAN],
+            release_only=True,
+            num_batches=4,
+            timeout=10800,
         ),
         JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_ASAN],

From dbee1542b6541e6aa1634706a5ce1a681fc717c2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 21 Aug 2024 23:54:19 +0200
Subject: [PATCH 315/363] It --> If

---
 src/Functions/LowerUpperUTF8Impl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index 8469bedde0c..36ee1723269 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -57,7 +57,7 @@ struct LowerUpperUTF8Impl
             input.toUTF8String(output);
 
             /// For valid UTF-8 input strings, ICU sometimes produces output with an extra '\0 at the end. Only the data before that
-            /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this
+            /// '\0' is valid. If the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this
             /// case, the behavior is also reasonable.
             size_t valid_size = output.size();
             if (!output.empty() && output.back() == '\0')

From 2f6ad1271cfbd9aa62ad2365e70314aba4da21b9 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 00:27:02 +0200
Subject: [PATCH 316/363] fix tests + exception

---
 src/Storages/VirtualColumnUtils.cpp           |  2 +-
 .../test_storage_azure_blob_storage/test.py   | 10 +--
 tests/integration/test_storage_hdfs/test.py   |  9 +--
 .../03203_hive_style_partitioning.reference   |  2 -
 .../03203_hive_style_partitioning.sh          | 61 +++----------------
 5 files changed, 15 insertions(+), 69 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index ca82a1ce67a..f0d276e4e56 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
                 return;
 
             if (storage_columns.size() == 1)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file");
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use hive partitioning for file {}: it contains only partition columns. Disable use_hive_partitioning setting to read this file", path);
             auto local_type = storage_columns.get(name).type;
             storage_columns.remove(name);
             desc.addEphemeral(name, local_type, "");
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 637dbd38262..a3172329a99 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}')"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format(
+        "Gordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
             bucket="cont", max_path=path
         )
     ]
@@ -1560,7 +1560,7 @@ def test_hive_partitioning_with_all_parameters(cluster):
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
-    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
+    pattern = r"DB::Exception: Cannot use hive partitioning for file"
 
     with pytest.raises(Exception, match=pattern):
         azure_query(node, query, settings={"use_hive_partitioning": 1})
@@ -1572,7 +1572,7 @@ def test_hive_partitioning_without_setting(cluster):
     table_format = "column1 String, column2 String"
     values_1 = f"('Elizabeth', 'Gordon')"
     values_2 = f"('Emilia', 'Gregor')"
-    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
+    path = "a/column1=Elizabeth/column2=Gordon/column3=Gordon/sample.csv"
 
     azure_query(
         node,
@@ -1582,7 +1582,7 @@ def test_hive_partitioning_without_setting(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index ad2e7084791..ea8c4efa745 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1268,11 +1268,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     )
     assert r == f"Elizabeth\n"
 
-    r = node1.query(
-        "SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
-        settings={"use_hive_partitioning": 1},
-    )
-    assert r == f"Gordon\n"
 
 
 def test_hive_partitioning_with_all_parameters(started_cluster):
@@ -1285,11 +1280,11 @@ def test_hive_partitioning_with_all_parameters(started_cluster):
         == f"Elizabeth\tGordon\n"
     )
 
-    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
+    pattern = r"DB::Exception: Cannot use hive partitioning for file"
 
     with pytest.raises(QueryRuntimeException, match=pattern):
         node1.query(
-            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
             settings={"use_hive_partitioning": 1},
         )
 
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index b5eaef7f51e..af52dcd9b88 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -35,8 +35,6 @@ Cross	Elizabeth
 Array(Int64)	LowCardinality(Float64)
 101
 2070
-4081
-2070
 2070
 b
 1
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 41b215578f0..4e165446c34 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -11,22 +11,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
 
 SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
 SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
@@ -37,7 +25,6 @@ SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
 SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
 SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """
@@ -61,21 +48,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
-
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
 
@@ -92,24 +65,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
 $CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
-
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
+SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
 """
 
 $CLICKHOUSE_CLIENT -n -q """
@@ -123,13 +82,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'"
 $CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth';
 """

From 8a89cd31a1e7770479af6eaf1b4211ef4ece1795 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 22 Aug 2024 00:29:32 +0200
Subject: [PATCH 317/363] Fix Upgrade Check: move some settings to 24.9 section

---
 src/Core/SettingsChangesHistory.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index fb59577b0f0..5e831c6301c 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -72,11 +72,13 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
     {"24.9",
         {
             {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
+            {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
+            {"create_if_not_exists", false, false, "New setting."},
+            {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
         }
     },
     {"24.8",
         {
-            {"create_if_not_exists", false, false, "New setting."},
             {"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
             {"restore_replace_external_table_functions_to_null", false, false, "New setting."},
             {"restore_replace_external_engines_to_null", false, false, "New setting."},
@@ -85,7 +87,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
             {"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
             {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
-            {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
             {"query_cache_tag", "", "", "New setting for labeling query cache settings."},
             {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
             {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
@@ -93,7 +94,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"allow_experimental_json_type", false, false, "Add new experimental JSON type"},
             {"use_json_alias_for_old_object_type", true, false, "Use JSON type alias to create new JSON type"},
             {"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"},
-            {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
             {"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
             {"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"}
         }

From 8cf632312568560bd2989b43c7a855942b20a46a Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 00:48:29 +0200
Subject: [PATCH 318/363] fix black

---
 tests/integration/test_storage_hdfs/test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index ea8c4efa745..a75c13b9ea6 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1269,7 +1269,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     assert r == f"Elizabeth\n"
 
 
-
 def test_hive_partitioning_with_all_parameters(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(

From 92e153585ded4f15e1292613584ff35a55c735f3 Mon Sep 17 00:00:00 2001
From: Tanya Bragin <tbragin@users.noreply.github.com>
Date: Wed, 21 Aug 2024 19:19:07 -0700
Subject: [PATCH 319/363] Update README.md

Add latest meetups from Alexey tour.
---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index 17b6dcd2ac1..5e66b9da73e 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,13 @@ Every month we get together with the community (users, contributors, customers,
 
 Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
 
+The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov:
+
 * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
+* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
+* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9
+* [New York Meetup (Ramp)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
+* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
 
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

From 9d0b3e3937cca32bc8bc922876fb8e6ac53a3de9 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 22 Aug 2024 11:32:59 +0800
Subject: [PATCH 320/363] change as request

---
 .../functions/string-replace-functions.md     | 145 +++++++++---------
 src/Functions/overlay.cpp                     |   8 +-
 ...new_functions_must_be_documented.reference |   2 -
 3 files changed, 76 insertions(+), 79 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index 55e97688b18..0cc6b0b27d5 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -8,6 +8,78 @@ sidebar_label: Replacing in Strings
 
 [General strings functions](string-functions.md) and [functions for searching in strings](string-search-functions.md) are described separately.
 
+## overlay
+
+Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`.
+
+**Syntax**
+
+```sql
+overlay(s, replace, offset[, length])
+```
+
+**Parameters**
+
+- `input`: A string type [String](../data-types/string.md).
+- `replace`: A string type [String](../data-types/string.md).
+- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string.
+- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of bytes removed from `input` equals the length of `replace`; otherwise `length` bytes are removed.
+
+**Returned value**
+
+- A [String](../data-types/string.md) data type value.
+
+**Example**
+
+```sql
+SELECT overlay('ClickHouse SQL', 'CORE', 12) AS res;
+```
+
+Result:
+
+```text
+┌─res─────────────┐
+│ ClickHouse CORE │
+└─────────────────┘
+```
+
+## overlayUTF8
+
+Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`.
+
+Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+**Syntax**
+
+```sql
+overlayUTF8(s, replace, offset[, length])
+```
+
+**Parameters**
+
+- `s`: A string type [String](../data-types/string.md).
+- `replace`: A string type [String](../data-types/string.md).
+- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string.
+- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of characters removed from `input` equals the length of `replace`; otherwise `length` characters are removed.
+
+**Returned value**
+
+- A [String](../data-types/string.md) data type value.
+
+**Example**
+
+```sql
+SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res;
+```
+
+Result:
+
+```text
+┌─res────────────────────────┐
+│ ClickHouse是开源OLAP数据库   │
+└────────────────────────────┘
+```
+
 ## replaceOne
 
 Replaces the first occurrence of the substring `pattern` in `haystack` by the `replacement` string.
@@ -248,76 +320,3 @@ select printf('%%%s %s %d', 'Hello', 'World', 2024);
 │ %Hello World 2024                            │
 └──────────────────────────────────────────────┘
 ```
-
-## overlay
-
-Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed.
-
-
-**Syntax**
-
-```sql
-overlay(s, replace, offset[, length])
-```
-
-**Parameters**
-
-- `s`: A string type [String](../data-types/string.md).
-- `replace`: A string type [String](../data-types/string.md).
-- `offset`: An integer type [Int](../data-types/int-uint.md).
-- `length`: Optional. An integer type [Int](../data-types/int-uint.md).
-
-**Returned value**
-
-- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced.
-
-**Example**
-
-```sql
-SELECT overlay('Spark SQL', 'CORE', 7) AS res;
-```
-
-Result:
-
-```text
- ┌─res────────┐
- │ Spark CORE │
- └────────────┘
-```
-
-## overlayUTF8
-
-Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed.
-
-Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
-
-**Syntax**
-
-```sql
-overlayUTF8(s, replace, offset[, length])
-```
-
-**Parameters**
-
-- `s`: A string type [String](../data-types/string.md).
-- `replace`: A string type [String](../data-types/string.md).
-- `offset`: An integer type [Int](../data-types/int-uint.md).
-- `length`: Optional. An integer type [Int](../data-types/int-uint.md).
-
-**Returned value**
-
-- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced.
-
-**Example**
-
-```sql
-SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res;
-```
-
-Result:
-
-```text
-┌─res────────────────────────┐
-│ ClickHouse是开源OLAP数据库   │
-└────────────────────────────┘
-```
diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp
index 20988c775a5..497ebb9c9cd 100644
--- a/src/Functions/overlay.cpp
+++ b/src/Functions/overlay.cpp
@@ -201,14 +201,14 @@ private:
     {
         if (offset > 0)
         {
-            if (static_cast<size_t>(offset) > input_size + 1) [[unlikely]]
+            if (static_cast<size_t>(offset) > input_size + 1)
                 return input_size;
             else
                 return offset - 1;
         }
         else
         {
-            if (input_size < -static_cast<size_t>(offset)) [[unlikely]]
+            if (input_size < -static_cast<size_t>(offset))
                 return 0;
             else
                 return input_size + offset;
@@ -704,14 +704,14 @@ REGISTER_FUNCTION(Overlay)
 {
     factory.registerFunction<FunctionOverlay<false>>(
         {.description = R"(
-Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed.
+Replace a part of a string `input` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `input` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed.
 )",
          .categories{"String"}},
         FunctionFactory::Case::Insensitive);
 
     factory.registerFunction<FunctionOverlay<true>>(
         {.description = R"(
-Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed.
+Replace a part of a string `input` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `input` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed.
 
 Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
 )",
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index 6495b6619f9..c39f1fb1ce9 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -512,8 +512,6 @@ nullIf
 nullIn
 nullInIgnoreSet
 or
-overlay
-overlayUTF8
 parseDateTime
 parseDateTime32BestEffort
 parseDateTime32BestEffortOrNull

From 3ff9522b69ec7e51119f445152ffb9678a0f124f Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 22 Aug 2024 12:49:10 +0800
Subject: [PATCH 321/363] change as request

---
 src/Functions/overlay.cpp                     | 165 +++++++++---------
 .../0_stateless/03205_overlay.reference       |  58 +++---
 tests/queries/0_stateless/03205_overlay.sql   |  11 +-
 3 files changed, 115 insertions(+), 119 deletions(-)

diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp
index 497ebb9c9cd..df8b825eabe 100644
--- a/src/Functions/overlay.cpp
+++ b/src/Functions/overlay.cpp
@@ -53,139 +53,132 @@ public:
         if (input_rows_count == 0)
             return ColumnString::create();
 
-        const size_t number_of_arguments = arguments.size();
-        bool has_three_args = number_of_arguments == 3;
+        bool has_four_args = (arguments.size() == 4);
 
-        ColumnPtr column_offset = arguments[2].column;
-        ColumnPtr column_length;
-        if (!has_three_args)
-            column_length = arguments[3].column;
+        ColumnPtr col_input = arguments[0].column;
+        const auto * col_input_const = checkAndGetColumn<ColumnConst>(col_input.get());
+        const auto * col_input_string = checkAndGetColumn<ColumnString>(col_input.get());
+        bool input_is_const = (col_input_const != nullptr);
 
-        const ColumnConst * column_offset_const = checkAndGetColumn<ColumnConst>(column_offset.get());
-        const ColumnConst * column_length_const = nullptr;
-        if (!has_three_args)
-            column_length_const = checkAndGetColumn<ColumnConst>(column_length.get());
+        ColumnPtr col_replace = arguments[1].column;
+        const auto * col_replace_const = checkAndGetColumn<ColumnConst>(col_replace.get());
+        const auto * col_replace_string = checkAndGetColumn<ColumnString>(col_replace.get());
+        bool replace_is_const = (col_replace_const != nullptr);
 
+        ColumnPtr col_offset = arguments[2].column;
+        const ColumnConst * col_offset_const = checkAndGetColumn<ColumnConst>(col_offset.get());
         bool offset_is_const = false;
-        bool length_is_const = false;
         Int64 offset = -1;
-        Int64 length = -1;
-        if (column_offset_const)
+        if (col_offset_const)
         {
-            offset = column_offset_const->getInt(0);
+            offset = col_offset_const->getInt(0);
             offset_is_const = true;
         }
 
-        if (column_length_const)
+        ColumnPtr col_length = has_four_args ? arguments[3].column : nullptr;
+        const ColumnConst * col_length_const = has_four_args ? checkAndGetColumn<ColumnConst>(col_length.get()) : nullptr;
+        bool length_is_const = false;
+        Int64 length = -1;
+        if (col_length_const)
         {
-            length = column_length_const->getInt(0);
+            length = col_length_const->getInt(0);
             length_is_const = true;
         }
 
-
         auto res_col = ColumnString::create();
         auto & res_data = res_col->getChars();
         auto & res_offsets = res_col->getOffsets();
+
         res_offsets.resize_exact(input_rows_count);
-
-        ColumnPtr column_input = arguments[0].column;
-        ColumnPtr column_replace = arguments[1].column;
-
-        const auto * column_input_const = checkAndGetColumn<ColumnConst>(column_input.get());
-        const auto * column_input_string = checkAndGetColumn<ColumnString>(column_input.get());
-        if (column_input_const)
+        if (col_input_const)
         {
-            StringRef input = column_input_const->getDataAt(0);
+            StringRef input = col_input_const->getDataAt(0);
             res_data.reserve((input.size + 1) * input_rows_count);
         }
         else
         {
-            res_data.reserve(column_input_string->getChars().size());
+            res_data.reserve(col_input_string->getChars().size());
         }
 
-        const auto * column_replace_const = checkAndGetColumn<ColumnConst>(column_replace.get());
-        const auto * column_replace_string = checkAndGetColumn<ColumnString>(column_replace.get());
-        bool input_is_const = (column_input_const != nullptr);
-        bool replace_is_const = (column_replace_const != nullptr);
 
-#define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \
+#define OVERLAY_EXECUTE_CASE(HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \
     if (input_is_const && replace_is_const) \
-        constantConstant<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+        constantConstant<HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
             input_rows_count, \
-            column_input_const->getDataAt(0), \
-            column_replace_const->getDataAt(0), \
-            column_offset, \
-            column_length, \
+            col_input_const->getDataAt(0), \
+            col_replace_const->getDataAt(0), \
+            col_offset, \
+            col_length, \
             offset, \
             length, \
             res_data, \
             res_offsets); \
     else if (input_is_const && !replace_is_const) \
-        constantVector<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+        constantVector<HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
             input_rows_count, \
-            column_input_const->getDataAt(0), \
-            column_replace_string->getChars(), \
-            column_replace_string->getOffsets(), \
-            column_offset, \
-            column_length, \
+            col_input_const->getDataAt(0), \
+            col_replace_string->getChars(), \
+            col_replace_string->getOffsets(), \
+            col_offset, \
+            col_length, \
             offset, \
             length, \
             res_data, \
             res_offsets); \
     else if (!input_is_const && replace_is_const) \
-        vectorConstant<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+        vectorConstant<HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
             input_rows_count, \
-            column_input_string->getChars(), \
-            column_input_string->getOffsets(), \
-            column_replace_const->getDataAt(0), \
-            column_offset, \
-            column_length, \
+            col_input_string->getChars(), \
+            col_input_string->getOffsets(), \
+            col_replace_const->getDataAt(0), \
+            col_offset, \
+            col_length, \
             offset, \
             length, \
             res_data, \
             res_offsets); \
     else \
-        vectorVector<THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
+        vectorVector<HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
             input_rows_count, \
-            column_input_string->getChars(), \
-            column_input_string->getOffsets(), \
-            column_replace_string->getChars(), \
-            column_replace_string->getOffsets(), \
-            column_offset, \
-            column_length, \
+            col_input_string->getChars(), \
+            col_input_string->getOffsets(), \
+            col_replace_string->getChars(), \
+            col_replace_string->getOffsets(), \
+            col_offset, \
+            col_length, \
             offset, \
             length, \
             res_data, \
             res_offsets);
 
-        if (has_three_args)
+        if (!has_four_args)
         {
             if (offset_is_const)
             {
-                OVERLAY_EXECUTE_CASE(true, true, false)
+                OVERLAY_EXECUTE_CASE(false, true, false)
             }
             else
             {
-                OVERLAY_EXECUTE_CASE(true, false, false)
+                OVERLAY_EXECUTE_CASE(false, false, false)
             }
         }
         else
         {
             if (offset_is_const && length_is_const)
             {
-                OVERLAY_EXECUTE_CASE(false, true, true)
+                OVERLAY_EXECUTE_CASE(true, true, true)
             }
             else if (offset_is_const && !length_is_const)
             {
-                OVERLAY_EXECUTE_CASE(false, true, false)
+                OVERLAY_EXECUTE_CASE(true, true, false)
             }
             else if (!offset_is_const && length_is_const)
             {
-                OVERLAY_EXECUTE_CASE(false, false, true)
+                OVERLAY_EXECUTE_CASE(true, false, true)
             }
             else
             {
-                OVERLAY_EXECUTE_CASE(false, false, false)
+                OVERLAY_EXECUTE_CASE(true, false, false)
             }
         }
 #undef OVERLAY_EXECUTE_CASE
@@ -224,7 +217,7 @@ private:
             return bytes;
     }
 
-    template <bool has_three_args, bool offset_is_const, bool length_is_const>
+    template <bool has_four_args, bool offset_is_const, bool length_is_const>
     void constantConstant(
         size_t rows,
         const StringRef & input,
@@ -236,7 +229,7 @@ private:
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets) const
     {
-        if (!has_three_args && length_is_const && const_length < 0)
+        if (has_four_args && length_is_const && const_length < 0)
         {
             constantConstant<true, offset_is_const, false>(
                 rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
@@ -250,12 +243,12 @@ private:
 
         size_t replace_size = getSliceSize(reinterpret_cast<const UInt8 *>(replace.data), replace.size);
         size_t valid_length = 0; // not negative
-        if constexpr (!has_three_args && length_is_const)
+        if constexpr (has_four_args && length_is_const)
         {
             assert(const_length >= 0);
             valid_length = const_length;
         }
-        else if constexpr (has_three_args)
+        else if constexpr (!has_four_args)
         {
             valid_length = replace_size;
         }
@@ -273,14 +266,14 @@ private:
                 valid_offset = getValidOffset(offset, input_size);
             }
 
-            if constexpr (!has_three_args && !length_is_const)
+            if constexpr (has_four_args && !length_is_const)
             {
                 length = column_length->getInt(i);
                 valid_length = length >= 0 ? length : replace_size;
             }
 
             size_t prefix_size = valid_offset;
-            size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
+            size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length);
 
             if constexpr (!is_utf8)
             {
@@ -332,11 +325,12 @@ private:
             /// add zero terminator
             res_data[res_offset] = 0;
             ++res_offset;
+
             res_offsets[i] = res_offset;
         }
     }
 
-    template <bool has_three_args, bool offset_is_const, bool length_is_const>
+    template <bool has_four_args, bool offset_is_const, bool length_is_const>
     void vectorConstant(
         size_t rows,
         const ColumnString::Chars & input_data,
@@ -349,7 +343,7 @@ private:
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets) const
     {
-        if (!has_three_args && length_is_const && const_length < 0)
+        if (has_four_args && length_is_const && const_length < 0)
         {
             vectorConstant<true, offset_is_const, false>(
                 rows, input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets);
@@ -359,12 +353,12 @@ private:
         size_t replace_size = getSliceSize(reinterpret_cast<const UInt8 *>(replace.data), replace.size);
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
-        if constexpr (!has_three_args && length_is_const)
+        if constexpr (has_four_args && length_is_const)
         {
             assert(const_length >= 0);
             valid_length = const_length;
         }
-        else if constexpr (has_three_args)
+        else if constexpr (!has_four_args)
         {
             valid_length = replace_size;
         }
@@ -388,14 +382,14 @@ private:
                 valid_offset = getValidOffset(offset, input_size);
             }
 
-            if constexpr (!has_three_args && !length_is_const)
+            if constexpr (has_four_args && !length_is_const)
             {
                 length = column_length->getInt(i);
                 valid_length = length >= 0 ? length : replace_size;
             }
 
             size_t prefix_size = valid_offset;
-            size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
+            size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length);
 
             if constexpr (!is_utf8)
             {
@@ -449,11 +443,12 @@ private:
             /// add zero terminator
             res_data[res_offset] = 0;
             ++res_offset;
+
             res_offsets[i] = res_offset;
         }
     }
 
-    template <bool has_three_args, bool offset_is_const, bool length_is_const>
+    template <bool has_four_args, bool offset_is_const, bool length_is_const>
     void constantVector(
         size_t rows,
         const StringRef & input,
@@ -466,7 +461,7 @@ private:
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets) const
     {
-        if (!has_three_args && length_is_const && const_length < 0)
+        if (has_four_args && length_is_const && const_length < 0)
         {
             constantVector<true, offset_is_const, false>(
                 rows, input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets);
@@ -480,7 +475,7 @@ private:
 
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
-        if constexpr (!has_three_args && length_is_const)
+        if constexpr (has_four_args && length_is_const)
         {
             assert(const_length >= 0);
             valid_length = const_length;
@@ -502,7 +497,7 @@ private:
                 valid_offset = getValidOffset(offset, input_size);
             }
 
-            if constexpr (has_three_args)
+            if constexpr (!has_four_args)
             {
                 valid_length = replace_size;
             }
@@ -513,7 +508,7 @@ private:
             }
 
             size_t prefix_size = valid_offset;
-            size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
+            size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length);
 
             if constexpr (!is_utf8)
             {
@@ -563,11 +558,12 @@ private:
             /// add zero terminator
             res_data[res_offset] = 0;
             ++res_offset;
+
             res_offsets[i] = res_offset;
         }
     }
 
-    template <bool has_three_args, bool offset_is_const, bool length_is_const>
+    template <bool has_four_args, bool offset_is_const, bool length_is_const>
     void vectorVector(
         size_t rows,
         const ColumnString::Chars & input_data,
@@ -581,7 +577,7 @@ private:
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets) const
     {
-        if (!has_three_args && length_is_const && const_length < 0)
+        if (has_four_args && length_is_const && const_length < 0)
         {
             vectorVector<true, offset_is_const, false>(
                 rows,
@@ -600,7 +596,7 @@ private:
 
         Int64 length = 0; // maybe negative
         size_t valid_length = 0; // not negative
-        if constexpr (!has_three_args && length_is_const)
+        if constexpr (has_four_args && length_is_const)
         {
             assert(const_length >= 0);
             valid_length = const_length;
@@ -629,7 +625,7 @@ private:
                 valid_offset = getValidOffset(offset, input_size);
             }
 
-            if constexpr (has_three_args)
+            if constexpr (!has_four_args)
             {
                 valid_length = replace_size;
             }
@@ -640,7 +636,7 @@ private:
             }
 
             size_t prefix_size = valid_offset;
-            size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length;
+            size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length);
 
             if constexpr (!is_utf8)
             {
@@ -693,6 +689,7 @@ private:
             /// add zero terminator
             res_data[res_offset] = 0;
             ++res_offset;
+
             res_offsets[i] = res_offset;
         }
     }
diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference
index 67a699944e0..4be3baadaea 100644
--- a/tests/queries/0_stateless/03205_overlay.reference
+++ b/tests/queries/0_stateless/03205_overlay.reference
@@ -25,34 +25,34 @@ Spark ANSI SQL	Spark ANSI SQL和CH
 Spark ANSI SQL	Spark ANSI SQL和CH
 Spark ANSI SQL	Spark ANSI SQL和CH
 Spark ANSI SQL	Spark ANSI SQL和CH
-Test with different offset values
--12	_park SQL	_park SQL和CH
--11	_park SQL	S_ark SQL和CH
--10	_park SQL	Sp_rk SQL和CH
--9	_park SQL	Spa_k SQL和CH
--8	S_ark SQL	Spar_ SQL和CH
--7	Sp_rk SQL	Spark_SQL和CH
--6	Spa_k SQL	Spark _QL和CH
--5	Spar_ SQL	Spark S_L和CH
--4	Spark_SQL	Spark SQ_和CH
--3	Spark _QL	Spark SQL_CH
--2	Spark S_L	Spark SQL和_H
--1	Spark SQ_	Spark SQL和C_
-0	Spark SQL_	Spark SQL和CH_
-1	_park SQL	_park SQL和CH
-2	S_ark SQL	S_ark SQL和CH
-3	Sp_rk SQL	Sp_rk SQL和CH
-4	Spa_k SQL	Spa_k SQL和CH
-5	Spar_ SQL	Spar_ SQL和CH
-6	Spark_SQL	Spark_SQL和CH
-7	Spark _QL	Spark _QL和CH
-8	Spark S_L	Spark S_L和CH
-9	Spark SQ_	Spark SQ_和CH
-10	Spark SQL_	Spark SQL_CH
-11	Spark SQL_	Spark SQL和_H
-12	Spark SQL_	Spark SQL和C_
-13	Spark SQL_	Spark SQL和CH_
-Test with different length values
+Test with special offset values
+-12	__ark SQL	之park SQL和CH
+-11	__ark SQL	S之ark SQL和CH
+-10	__ark SQL	Sp之rk SQL和CH
+-9	__ark SQL	Spa之k SQL和CH
+-8	S__rk SQL	Spar之 SQL和CH
+-7	Sp__k SQL	Spark之SQL和CH
+-6	Spa__ SQL	Spark 之QL和CH
+-5	Spar__SQL	Spark S之L和CH
+-4	Spark__QL	Spark SQ之和CH
+-3	Spark __L	Spark SQL之CH
+-2	Spark S__	Spark SQL和之H
+-1	Spark SQ__	Spark SQL和C之
+0	Spark SQL__	Spark SQL和CH之
+1	__ark SQL	之park SQL和CH
+2	S__rk SQL	S之ark SQL和CH
+3	Sp__k SQL	Sp之rk SQL和CH
+4	Spa__ SQL	Spa之k SQL和CH
+5	Spar__SQL	Spar之 SQL和CH
+6	Spark__QL	Spark之SQL和CH
+7	Spark __L	Spark 之QL和CH
+8	Spark S__	Spark S之L和CH
+9	Spark SQ__	Spark SQ之和CH
+10	Spark SQL__	Spark SQL之CH
+11	Spark SQL__	Spark SQL和之H
+12	Spark SQL__	Spark SQL和C之
+13	Spark SQL__	Spark SQL和CH之
+Test with special length values
 -1	Spark ANSI 	Spark ANSI H
 0	Spark ANSI SQL	Spark ANSI SQL和CH
 1	Spark ANSI QL	Spark ANSI QL和CH
@@ -61,7 +61,7 @@ Test with different length values
 4	Spark ANSI 	Spark ANSI CH
 5	Spark ANSI 	Spark ANSI H
 6	Spark ANSI 	Spark ANSI 
-Test with different input and replace values
+Test with special input and replace values
 _	_
 Spark SQL	Spark SQL和CH
 ANSI 	ANSI 
diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql
index 4d0b5ecbe03..765b29f93ec 100644
--- a/tests/queries/0_stateless/03205_overlay.sql
+++ b/tests/queries/0_stateless/03205_overlay.sql
@@ -1,5 +1,4 @@
 SELECT 'Negative test of overlay';
-SELECT overlay('hello', 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT overlay('hello', 'world'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT overlay('hello', 'world', 2, 3, 'extra'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT overlay(123, 'world', 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
@@ -35,13 +34,13 @@ SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)
 SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0));
 SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0));
 
-SELECT 'Test with different offset values';
-WITH number - 12 as offset SELECT offset, overlay('Spark SQL', '_', offset), overlayUTF8('Spark SQL和CH', '_', offset) from numbers(26);
+SELECT 'Test with special offset values';
+WITH number - 12 AS offset SELECT offset, overlay('Spark SQL', '__', offset), overlayUTF8('Spark SQL和CH', '之', offset) FROM numbers(26);
 
-SELECT 'Test with different length values';
-WITH number - 1 as length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) from numbers(8);
+SELECT 'Test with special length values';
+WITH number - 1 AS length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) FROM numbers(8);
 
-SELECT 'Test with different input and replace values';
+SELECT 'Test with special input and replace values';
 SELECT overlay('', '_', 6), overlayUTF8('', '_', 6);
 SELECT overlay('Spark SQL', '', 6), overlayUTF8('Spark SQL和CH', '', 6);
 SELECT overlay('', 'ANSI ', 7, 0), overlayUTF8('', 'ANSI ', 7, 0);

From be4439e3ec0a1491f4e333ac848844fd930a6e5b Mon Sep 17 00:00:00 2001
From: Alexey <signfinder@gmail.com>
Date: Thu, 22 Aug 2024 10:30:48 +0300
Subject: [PATCH 322/363] Update install.md

Added correct commands for russian vwersion of the installation from deb packets
---
 docs/ru/getting-started/install.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md
index aee445da843..4a0ec258c64 100644
--- a/docs/ru/getting-started/install.md
+++ b/docs/ru/getting-started/install.md
@@ -25,10 +25,10 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
 Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните:
 
 ``` bash
-sudo apt-get install -y apt-transport-https ca-certificates dirmngr
-sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
+sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
+curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg
 
-echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
+echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
     /etc/apt/sources.list.d/clickhouse.list
 sudo apt-get update
 

From 95f45d2eaf39a9e8a6373c75749ec57f727be700 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 13:20:04 +0200
Subject: [PATCH 323/363] try to fix tests

---
 .../test_storage_azure_blob_storage/test.py   | 14 +++++------
 tests/integration/test_storage_hdfs/test.py   | 25 +++----------------
 .../03203_hive_style_partitioning.reference   | 20 +++++++--------
 .../03203_hive_style_partitioning.sh          | 14 +++--------
 4 files changed, 23 insertions(+), 50 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index a3172329a99..c1f518e45ce 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1513,14 +1513,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values}",
         settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
         f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}')"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}')"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
@@ -1533,7 +1533,7 @@ def test_hive_partitioning_with_one_parameter(cluster):
     query = (
         f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}');"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
@@ -1551,14 +1551,14 @@ def test_hive_partitioning_with_all_parameters(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
         settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
         f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}');"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
     )
     pattern = r"DB::Exception: Cannot use hive partitioning for file"
 
@@ -1577,14 +1577,14 @@ def test_hive_partitioning_without_setting(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
         settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
         f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}');"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
     )
     pattern = re.compile(
         r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index a75c13b9ea6..31cc8609eb4 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1259,35 +1259,16 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
 
 def test_hive_partitioning_with_one_parameter(started_cluster):
     hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n")
-    assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
+    hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n")
+    assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n"
 
     r = node1.query(
-        "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')",
         settings={"use_hive_partitioning": 1},
     )
     assert r == f"Elizabeth\n"
 
 
-def test_hive_partitioning_with_all_parameters(started_cluster):
-    hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(
-        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
-    )
-    assert (
-        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
-        == f"Elizabeth\tGordon\n"
-    )
-
-    pattern = r"DB::Exception: Cannot use hive partitioning for file"
-
-    with pytest.raises(QueryRuntimeException, match=pattern):
-        node1.query(
-            f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-            settings={"use_hive_partitioning": 1},
-        )
-
-
 def test_hive_partitioning_without_setting(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index af52dcd9b88..acdadc2510b 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -1,5 +1,5 @@
 TESTING THE FILE HIVE PARTITIONING
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -19,8 +19,7 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -35,12 +34,13 @@ Cross	Elizabeth
 Array(Int64)	LowCardinality(Float64)
 101
 2070
+4081
+2070
 2070
 b
 1
-1
 TESTING THE URL PARTITIONING
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -60,10 +60,9 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
 1
 TESTING THE S3 PARTITIONING
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -83,8 +82,7 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -96,7 +94,7 @@ Delgado	Elizabeth
 Cross	Elizabeth
 OK
 TESTING THE S3CLUSTER PARTITIONING
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -106,7 +104,7 @@ Gibson	Elizabeth
 Greer	Elizabeth
 Delgado	Elizabeth
 Cross	Elizabeth
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 4e165446c34..b3d196924af 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -14,7 +14,7 @@ set use_hive_partitioning = 1;
 SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
 
 SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
 SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
@@ -29,16 +29,10 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c
 SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """
 
-$CLICKHOUSE_LOCAL -n -q """
-set use_hive_partitioning = 1;
-
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
-""" 2>&1 | grep -c "INCORRECT_DATA"
-
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
@@ -68,7 +62,7 @@ set use_hive_partitioning = 1;
 SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
+SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
 """
 
 $CLICKHOUSE_CLIENT -n -q """
@@ -84,5 +78,5 @@ set use_hive_partitioning = 1;
 
 SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth';
+SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
 """

From 84467077b886cd48c9cd33c69c1935b3f7863dd7 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 22 Aug 2024 13:45:13 +0200
Subject: [PATCH 324/363] Fix test for role expiration in RoleCache.

---
 tests/integration/test_role/test.py | 81 +++++++++--------------------
 1 file changed, 26 insertions(+), 55 deletions(-)

diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py
index b3b18dc8271..9d15f0f81db 100644
--- a/tests/integration/test_role/test.py
+++ b/tests/integration/test_role/test.py
@@ -418,72 +418,43 @@ def test_function_current_roles():
     )
 
 
-def test_role_expiration():
-    instance.query("CREATE USER ure")
+@pytest.mark.parametrize("with_extra_role", [False, True])
+def test_role_expiration(with_extra_role):
     instance.query("CREATE ROLE rre")
-    instance.query("GRANT rre TO ure")
+    instance.query("CREATE USER ure DEFAULT ROLE rre")
 
-    instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log")
-    instance.query("INSERT INTO tre VALUES (0)")
+    instance.query("CREATE TABLE table1 (id Int) Engine=Log")
+    instance.query("CREATE TABLE table2 (id Int) Engine=Log")
+    instance.query("INSERT INTO table1 VALUES (1)")
+    instance.query("INSERT INTO table2 VALUES (2)")
 
+    instance.query("GRANT SELECT ON table1 TO rre")
+
+    assert instance.query("SELECT * FROM table1", user="ure") == "1\n"
     assert "Not enough privileges" in instance.query_and_get_error(
-        "SELECT * FROM tre", user="ure"
+        "SELECT * FROM table2", user="ure"
     )
 
-    instance.query("GRANT SELECT ON tre TO rre")
-
-    assert instance.query("SELECT * FROM tre", user="ure") == "0\n"
-
     # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test
     # so we wait >2 seconds until the role is expired
     time.sleep(5)
 
-    instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log")
-    instance.query("INSERT INTO tre1 VALUES (0)")
-    instance.query("GRANT SELECT ON tre1 TO rre")
+    if with_extra_role:
+        # Expiration of role "rre" from the role cache can be caused by another role being used.
+        instance.query("CREATE ROLE extra_role")
+        instance.query("CREATE USER extra_user DEFAULT ROLE extra_role")
+        instance.query("GRANT SELECT ON table1 TO extra_role")
+        assert instance.query("SELECT * FROM table1", user="extra_user") == "1\n"
 
-    assert instance.query("SELECT * from tre1", user="ure") == "0\n"
+    instance.query("GRANT SELECT ON table2 TO rre")
+    assert instance.query("SELECT * FROM table1", user="ure") == "1\n"
+    assert instance.query("SELECT * FROM table2", user="ure") == "2\n"
 
-    instance.query("DROP USER ure")
     instance.query("DROP ROLE rre")
-    instance.query("DROP TABLE tre")
-    instance.query("DROP TABLE tre1")
-
-
-def test_two_roles_expiration():
-    instance.query("CREATE USER ure")
-    instance.query("CREATE ROLE rre")
-    instance.query("GRANT rre TO ure")
-
-    instance.query("CREATE ROLE rre_second")
-
-    instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log")
-    instance.query("INSERT INTO tre VALUES (0)")
-
-    assert "Not enough privileges" in instance.query_and_get_error(
-        "SELECT * FROM tre", user="ure"
-    )
-
-    instance.query("GRANT SELECT ON tre TO rre")
-
-    assert instance.query("SELECT * FROM tre", user="ure") == "0\n"
-
-    # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test
-    # so we wait >2 seconds until the roles are expired
-    time.sleep(5)
-
-    instance.query(
-        "GRANT SELECT ON tre1 TO rre_second"
-    )  # we expect that both rre and rre_second are gone from cache upon this operation
-
-    instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log")
-    instance.query("INSERT INTO tre1 VALUES (0)")
-    instance.query("GRANT SELECT ON tre1 TO rre")
-
-    assert instance.query("SELECT * from tre1", user="ure") == "0\n"
-
     instance.query("DROP USER ure")
-    instance.query("DROP ROLE rre")
-    instance.query("DROP ROLE rre_second")
-    instance.query("DROP TABLE tre")
-    instance.query("DROP TABLE tre1")
+    instance.query("DROP TABLE table1")
+    instance.query("DROP TABLE table2")
+
+    if with_extra_role:
+        instance.query("DROP ROLE extra_role")
+        instance.query("DROP USER extra_user")

From 664e9b3db9d47e45c642ad21e3a5273ab423199a Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sun, 4 Aug 2024 13:30:41 +0200
Subject: [PATCH 325/363] Add one more test.

---
 tests/integration/test_role/test.py | 173 ++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)

diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py
index 9d15f0f81db..225cab975ff 100644
--- a/tests/integration/test_role/test.py
+++ b/tests/integration/test_role/test.py
@@ -1,5 +1,6 @@
 import time
 import pytest
+import random
 from helpers.client import QueryRuntimeException
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
@@ -458,3 +459,175 @@ def test_role_expiration(with_extra_role):
     if with_extra_role:
         instance.query("DROP ROLE extra_role")
         instance.query("DROP USER extra_user")
+
+
+def test_roles_cache():
+    # This test takes 20 seconds.
+    test_time = 20
+
+    # Three users A, B, C.
+    users = ["A", "B", "C"]
+    instance.query("CREATE USER " + ", ".join(users))
+
+    # Table "tbl" has 10 columns. Each of the users has access to a different set of columns.
+    num_columns = 10
+    columns = [f"x{i}" for i in range(1, num_columns + 1)]
+    columns_with_types = [column + " Int64" for column in columns]
+    columns_with_types_comma_separated = ", ".join(columns_with_types)
+    values = list(range(1, num_columns + 1))
+    values_comma_separated = ", ".join([str(value) for value in values])
+    instance.query(
+        f"CREATE TABLE tbl ({columns_with_types_comma_separated}) ENGINE=MergeTree ORDER BY tuple()"
+    )
+    instance.query(f"INSERT INTO tbl VALUES ({values_comma_separated})")
+    columns_to_values = dict([(f"x{i}", i) for i in range(1, num_columns + 1)])
+
+    # In this test we create and modify roles multiple times along with updating the following variables.
+    # Then we check that each of the users has access to the expected set of columns.
+    roles = []
+    users_to_roles = dict([(user, []) for user in users])
+    roles_to_columns = {}
+
+    # Checks that each of the users can access the expected set of columns and can't access other columns.
+    def check():
+        for user in random.sample(users, len(users)):
+            expected_roles = users_to_roles[user]
+            expected_columns = list(
+                set(sum([roles_to_columns[role] for role in expected_roles], []))
+            )
+            expected_result = sorted(
+                [columns_to_values[column] for column in expected_columns]
+            )
+            query = " UNION ALL ".join(
+                [
+                    f"SELECT * FROM viewIfPermitted(SELECT {column} AS c FROM tbl ELSE null('c Int64'))"
+                    for column in columns
+                ]
+            )
+            result = instance.query(query, user=user).splitlines()
+            result = sorted([int(value) for value in result])
+            ok = result == expected_result
+            if not ok:
+                print(f"Show grants for {user}:")
+                print(
+                    instance.query(
+                        "SHOW GRANTS FOR " + ", ".join([user] + expected_roles)
+                    )
+                )
+                print(f"Expected result: {expected_result}")
+                print(f"Got unexpected result: {result}")
+            assert ok
+
+    # Grants one of our roles a permission to access one of the columns.
+    def grant_column():
+        columns_used_in_roles = sum(roles_to_columns.values(), [])
+        columns_to_choose = [
+            column for column in columns if column not in columns_used_in_roles
+        ]
+        if not columns_to_choose or not roles:
+            return False
+        column = random.choice(columns_to_choose)
+        role = random.choice(roles)
+        instance.query(f"GRANT SELECT({column}) ON tbl TO {role}")
+        roles_to_columns[role].append(column)
+        return True
+
+    # Revokes a permission to access one of the granted column from all our roles.
+    def revoke_column():
+        columns_used_in_roles = sum(roles_to_columns.values(), [])
+        columns_to_choose = list(set(columns_used_in_roles))
+        if not columns_to_choose or not roles:
+            return False
+        column = random.choice(columns_to_choose)
+        roles_str = ", ".join(roles)
+        instance.query(f"REVOKE SELECT({column}) ON tbl FROM {roles_str}")
+        for role in roles_to_columns:
+            if column in roles_to_columns[role]:
+                roles_to_columns[role].remove(column)
+        return True
+
+    # Creates a role and grants it to one of the users.
+    def create_role():
+        for role in ["R1", "R2", "R3"]:
+            if role not in roles:
+                instance.query(f"CREATE ROLE {role}")
+                roles.append(role)
+                if role not in roles_to_columns:
+                    roles_to_columns[role] = []
+        if "R1" not in users_to_roles["A"]:
+            instance.query("GRANT R1 TO A")
+            users_to_roles["A"].append("R1")
+        elif "R2" not in users_to_roles["B"]:
+            instance.query("GRANT R2 TO B")
+            users_to_roles["B"].append("R2")
+        elif "R3" not in users_to_roles["B"]:
+            instance.query("GRANT R3 TO R2")
+            users_to_roles["B"].append("R3")
+        elif "R3" not in users_to_roles["C"]:
+            instance.query("GRANT R3 TO C")
+            users_to_roles["C"].append("R3")
+        else:
+            return False
+        return True
+
+    # Drops one of our roles.
+    def drop_role():
+        if not roles:
+            return False
+        role = random.choice(roles)
+        instance.query(f"DROP ROLE {role}")
+        roles.remove(role)
+        for u in users_to_roles:
+            if role in users_to_roles[u]:
+                users_to_roles[u].remove(role)
+        del roles_to_columns[role]
+        if (role == "R2") and ("R3" in users_to_roles["B"]):
+            users_to_roles["B"].remove("R3")
+        return True
+
+    # Modifies some grants or roles randomly.
+    def modify():
+        while True:
+            rnd = random.random()
+            if rnd < 0.4:
+                if grant_column():
+                    break
+            elif rnd < 0.5:
+                if revoke_column():
+                    break
+            elif rnd < 0.9:
+                if create_role():
+                    break
+            else:
+                if drop_role():
+                    break
+
+    def maybe_modify():
+        if random.random() < 0.9:
+            modify()
+            modify()
+
+    # Sleeping is necessary in this test because the role cache in ClickHouse has expiration timeout.
+    def maybe_sleep():
+        if random.random() < 0.1:
+            # "role_cache_expiration_time_seconds" is set to 2 seconds in the test configuration.
+            # We need a sleep longer than that in this test sometimes.
+            seconds = random.random() * 5
+            print(f"Sleeping {seconds} seconds")
+            time.sleep(seconds)
+
+    # Main part of the test.
+    start_time = time.time()
+    end_time = start_time + test_time
+
+    while time.time() < end_time:
+        check()
+        maybe_sleep()
+        maybe_modify()
+        maybe_sleep()
+
+    check()
+
+    instance.query("DROP USER " + ", ".join(users))
+    instance.query("DROP ROLE " + ", ".join(roles))
+    instance.query("DROP TABLE tbl")

From 7ef5c366e873c4fd99f257eefbb3a350848e308c Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sun, 4 Aug 2024 13:33:50 +0200
Subject: [PATCH 326/363] Fix expiration in RoleCache.

---
 src/Access/RoleCache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp
index 2d94df2eea5..cc1f1520b67 100644
--- a/src/Access/RoleCache.cpp
+++ b/src/Access/RoleCache.cpp
@@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO
     SubscriptionsOnRoles new_subscriptions_on_roles;
     new_subscriptions_on_roles.reserve(subscriptions_on_roles.size());
 
-    auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); };
+    auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); };
 
     for (const auto & current_role : enabled_roles.params.current_roles)
         collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false);

From 54dd3afd49df9c92cd3621a5cec4c7464c341a71 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 22 Aug 2024 14:52:17 +0200
Subject: [PATCH 327/363] Turn off fault injection for insert in
 01396_inactive_replica_cleanup_nodes_zookeeper

---
 .../01396_inactive_replica_cleanup_nodes_zookeeper.sh        | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index bff85b3e29f..9ea15071856 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -23,11 +23,10 @@ $CLICKHOUSE_CLIENT -n --query "
     DETACH TABLE r2;
 "
 
-$CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
+# insert_keeper_fault_injection_probability=0 -- can slowdown insert a lot (produce a lot of parts)
+$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
 
 
-# Now wait for cleanup thread
-
 for _ in {1..60}; do
     $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
     [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break;

From 7a740819b9551a291827b9d37b8b724612587a20 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 22 Aug 2024 14:53:15 +0200
Subject: [PATCH 328/363] Accidentally deleted comment

---
 .../01396_inactive_replica_cleanup_nodes_zookeeper.sh            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index 9ea15071856..80e9253af2c 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -27,6 +27,7 @@ $CLICKHOUSE_CLIENT -n --query "
 $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
 
 
+# Now wait for cleanup thread
 for _ in {1..60}; do
     $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
     [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break;

From b3f084459f60b1e31c32736573af0810dee99230 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 14:53:53 +0200
Subject: [PATCH 329/363] fix black

---
 tests/integration/test_storage_hdfs/test.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 31cc8609eb4..b18940b7290 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1259,8 +1259,13 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
 
 def test_hive_partitioning_with_one_parameter(started_cluster):
     hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n")
-    assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n"
+    hdfs_api.write_data(
+        f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n"
+    )
+    assert (
+        hdfs_api.read_data(f"/column0=Elizabeth/file_1")
+        == f"column0,column1\nElizabeth,Gordon\n"
+    )
 
     r = node1.query(
         "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')",
@@ -1269,6 +1274,7 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     assert r == f"Elizabeth\n"
 
 
+
 def test_hive_partitioning_without_setting(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(

From 8d14d8523098a42cd778ef50a9b066508da8919c Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:24:33 +0200
Subject: [PATCH 330/363] fix black

---
 tests/integration/test_storage_hdfs/test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index b18940b7290..7a92e8adb0d 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1274,7 +1274,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     assert r == f"Elizabeth\n"
 
 
-
 def test_hive_partitioning_without_setting(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(

From add4718634317304f652579a9f201c3b81c96a7d Mon Sep 17 00:00:00 2001
From: Tanya Bragin <tbragin@users.noreply.github.com>
Date: Thu, 22 Aug 2024 06:37:27 -0700
Subject: [PATCH 331/363] Update README.md - Meetups update

Fixed one meetup location; Added more meetups
---
 README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5e66b9da73e..c9474ef0fc0 100644
--- a/README.md
+++ b/README.md
@@ -45,9 +45,17 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey
 * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
 * [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9
-* [New York Meetup (Ramp)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
+* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
 
+Other upcoming meetups
+* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27
+* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27
+* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
+* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
+* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
+* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
+
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
 * **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now!

From 91e65feaaedd4806875aed3d4be4f07edeefdb71 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 22 Aug 2024 13:42:30 +0000
Subject: [PATCH 332/363] fix virtual columns in Merge engine

---
 src/Storages/StorageDistributed.cpp                |  2 +-
 src/Storages/StorageMerge.cpp                      | 14 +++++++-------
 .../02890_describe_table_options.reference         |  8 ++++++++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index c4668159759..0b80858800b 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -290,7 +290,7 @@ VirtualColumnsDescription StorageDistributed::createVirtuals()
 
     desc.addEphemeral("_shard_num", std::make_shared<DataTypeUInt32>(), "Deprecated. Use function shardNum instead");
 
-    /// Add virtual columns from table of storage Merges.
+    /// Add virtual columns from table with Merge engine.
     desc.addEphemeral("_database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of database which the row comes from");
     desc.addEphemeral("_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of table which the row comes from");
 
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index e88844e2d31..0827321e296 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -642,10 +642,6 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
                     column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
             }
         }
-        else
-        {
-
-        }
 
         auto child = createPlanForTable(
             nested_storage_snaphsot,
@@ -657,6 +653,7 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
             row_policy_data_opt,
             modified_context,
             current_streams);
+
         child.plan.addInterpreterContext(modified_context);
 
         if (child.plan.isInitialized())
@@ -914,12 +911,14 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
         modified_query_info.table_expression = replacement_table_expression;
         modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
 
-        auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
-        if (storage_snapshot_->storage.supportsSubcolumns())
-            get_column_options.withSubcolumns();
+        auto get_column_options = GetColumnsOptions(GetColumnsOptions::All)
+            .withExtendedObjects()
+            .withSubcolumns(storage_snapshot_->storage.supportsSubcolumns());
 
         std::unordered_map<std::string, QueryTreeNodePtr> column_name_to_node;
 
+        /// Consider only non-virtual columns of storage while checking for _table and _database columns.
+        /// I.e. always override virtual columns with these names from underlying table (if any).
         if (!storage_snapshot_->tryGetColumn(get_column_options, "_table"))
         {
             auto table_name_node = std::make_shared<ConstantNode>(current_storage_id.table_name);
@@ -946,6 +945,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
             column_name_to_node.emplace("_database", function_node);
         }
 
+        get_column_options.withVirtuals();
         auto storage_columns = storage_snapshot_->metadata->getColumns();
 
         bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty();
diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference
index 9181cb27cb0..b77ef4a0fdf 100644
--- a/tests/queries/0_stateless/02890_describe_table_options.reference
+++ b/tests/queries/0_stateless/02890_describe_table_options.reference
@@ -54,6 +54,8 @@ _row_exists	UInt8			Persisted mask created by lightweight delete that show wheth
 _block_number	UInt64			Persisted original number of block that was assigned at insert	Delta, LZ4		1
 _block_offset	UInt64			Persisted original number of row in block that was assigned at insert	Delta, LZ4		1
 _shard_num	UInt32			Deprecated. Use function shardNum instead			1
+_database	LowCardinality(String)			The name of database which the row comes from			1
+_table	LowCardinality(String)			The name of table which the row comes from			1
 SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1;
 DESCRIBE TABLE t_describe_options;
 id	UInt64			index column			0	0
@@ -87,6 +89,8 @@ _row_exists	UInt8			Persisted mask created by lightweight delete that show wheth
 _block_number	UInt64			Persisted original number of block that was assigned at insert	Delta, LZ4		0	1
 _block_offset	UInt64			Persisted original number of row in block that was assigned at insert	Delta, LZ4		0	1
 _shard_num	UInt32			Deprecated. Use function shardNum instead			0	1
+_database	LowCardinality(String)			The name of database which the row comes from			0	1
+_table	LowCardinality(String)			The name of table which the row comes from			0	1
 arr.size0	UInt64						1	0
 t.a	String				ZSTD(1)		1	0
 t.b	UInt64				ZSTD(1)		1	0
@@ -144,6 +148,8 @@ _row_exists	UInt8	1
 _block_number	UInt64	1
 _block_offset	UInt64	1
 _shard_num	UInt32	1
+_database	LowCardinality(String)	1
+_table	LowCardinality(String)	1
 SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1;
 DESCRIBE TABLE t_describe_options;
 id	UInt64	0	0
@@ -177,6 +183,8 @@ _row_exists	UInt8	0	1
 _block_number	UInt64	0	1
 _block_offset	UInt64	0	1
 _shard_num	UInt32	0	1
+_database	LowCardinality(String)	0	1
+_table	LowCardinality(String)	0	1
 arr.size0	UInt64	1	0
 t.a	String	1	0
 t.b	UInt64	1	0

From ce33943b430a9ad512f4942083889dea4decb778 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:50:59 +0200
Subject: [PATCH 333/363] Fix flaky check

---
 tests/docker_scripts/stateless_runner.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/docker_scripts/stateless_runner.sh b/tests/docker_scripts/stateless_runner.sh
index 40a63f74a6b..d8921a04458 100755
--- a/tests/docker_scripts/stateless_runner.sh
+++ b/tests/docker_scripts/stateless_runner.sh
@@ -339,7 +339,7 @@ export -f run_tests
 if [ "$NUM_TRIES" -gt "1" ]; then
     # We don't run tests with Ordinary database in PRs, only in master.
     # So run new/changed tests with Ordinary at least once in flaky check.
-    NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \
+    NUM_TRIES=1 USE_DATABASE_ORDINARY=1 run_tests \
       | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
 fi
 

From a9e793532ae308767da3bc4da74d9631cd85eb90 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 22 Aug 2024 16:34:14 +0200
Subject: [PATCH 334/363] fix shutdown for PeriodicLog

---
 src/Interpreters/PeriodicLog.cpp | 3 ++-
 src/Interpreters/PeriodicLog.h   | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp
index 9d2891e11eb..15970ca5b81 100644
--- a/src/Interpreters/PeriodicLog.cpp
+++ b/src/Interpreters/PeriodicLog.cpp
@@ -1,6 +1,7 @@
 #include <Interpreters/PeriodicLog.h>
 #include <Interpreters/ErrorLog.h>
 #include <Interpreters/MetricLog.h>
+#include "Functions/DateTimeTransforms.h"
 
 namespace DB
 {
@@ -27,7 +28,7 @@ template <typename LogElement>
 void PeriodicLog<LogElement>::shutdown()
 {
     stopCollect();
-    this->stopFlushThread();
+    Base::shutdown();
 }
 
 template <typename LogElement>
diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h
index 08c3f7eb23f..ceac8088d40 100644
--- a/src/Interpreters/PeriodicLog.h
+++ b/src/Interpreters/PeriodicLog.h
@@ -17,6 +17,7 @@ template <typename LogElement>
 class PeriodicLog : public SystemLog<LogElement>
 {
     using SystemLog<LogElement>::SystemLog;
+    using Base = SystemLog<LogElement>;
 
 public:
     using TimePoint = std::chrono::system_clock::time_point;
@@ -24,12 +25,12 @@ public:
     /// Launches a background thread to collect metrics with interval
     void startCollect(size_t collect_interval_milliseconds_);
 
-    /// Stop background thread
-    void stopCollect();
-
     void shutdown() final;
 
 protected:
+    /// Stop background thread
+    void stopCollect();
+
     virtual void stepFunction(TimePoint current_time) = 0;
 
 private:

From 5340ac5fbc7fba75d6a743d345c0f79dc466df0b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 14:39:19 +0000
Subject: [PATCH 335/363] Update version_date.tsv and changelogs after
 v24.5.5.41-stable

---
 docs/changelogs/v24.5.5.41-stable.md | 71 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 +
 2 files changed, 73 insertions(+)
 create mode 100644 docs/changelogs/v24.5.5.41-stable.md

diff --git a/docs/changelogs/v24.5.5.41-stable.md b/docs/changelogs/v24.5.5.41-stable.md
new file mode 100644
index 00000000000..8ba160e31d7
--- /dev/null
+++ b/docs/changelogs/v24.5.5.41-stable.md
@@ -0,0 +1,71 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.5.5.41-stable (441d4a6ebe3) FIXME as compared to v24.5.4.49-stable (63b760955a0)
+
+#### Improvement
+* Backported in [#66768](https://github.com/ClickHouse/ClickHouse/issues/66768): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#65350](https://github.com/ClickHouse/ClickHouse/issues/65350): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#65621](https://github.com/ClickHouse/ClickHouse/issues/65621): Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66884](https://github.com/ClickHouse/ClickHouse/issues/66884): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#65933](https://github.com/ClickHouse/ClickHouse/issues/65933): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#66301](https://github.com/ClickHouse/ClickHouse/issues/66301): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66328](https://github.com/ClickHouse/ClickHouse/issues/66328): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#66155](https://github.com/ClickHouse/ClickHouse/issues/66155): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#66454](https://github.com/ClickHouse/ClickHouse/issues/66454): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#66226](https://github.com/ClickHouse/ClickHouse/issues/66226): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66680](https://github.com/ClickHouse/ClickHouse/issues/66680): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#66604](https://github.com/ClickHouse/ClickHouse/issues/66604): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
+* Backported in [#66360](https://github.com/ClickHouse/ClickHouse/issues/66360): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#66972](https://github.com/ClickHouse/ClickHouse/issues/66972): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66691](https://github.com/ClickHouse/ClickHouse/issues/66691): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Backported in [#66969](https://github.com/ClickHouse/ClickHouse/issues/66969): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66720](https://github.com/ClickHouse/ClickHouse/issues/66720): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#66951](https://github.com/ClickHouse/ClickHouse/issues/66951): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66757](https://github.com/ClickHouse/ClickHouse/issues/66757): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66948](https://github.com/ClickHouse/ClickHouse/issues/66948): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67633](https://github.com/ClickHouse/ClickHouse/issues/67633): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Backported in [#67481](https://github.com/ClickHouse/ClickHouse/issues/67481): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
+* Backported in [#67814](https://github.com/ClickHouse/ClickHouse/issues/67814): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67197](https://github.com/ClickHouse/ClickHouse/issues/67197): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#67379](https://github.com/ClickHouse/ClickHouse/issues/67379): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#67501](https://github.com/ClickHouse/ClickHouse/issues/67501): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67576](https://github.com/ClickHouse/ClickHouse/issues/67576): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67850](https://github.com/ClickHouse/ClickHouse/issues/67850): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#66387](https://github.com/ClickHouse/ClickHouse/issues/66387): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66426](https://github.com/ClickHouse/ClickHouse/issues/66426): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66544](https://github.com/ClickHouse/ClickHouse/issues/66544): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66859](https://github.com/ClickHouse/ClickHouse/issues/66859): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
+* Backported in [#66875](https://github.com/ClickHouse/ClickHouse/issues/66875): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67059](https://github.com/ClickHouse/ClickHouse/issues/67059): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
+* Backported in [#66945](https://github.com/ClickHouse/ClickHouse/issues/66945): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67252](https://github.com/ClickHouse/ClickHouse/issues/67252): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67412](https://github.com/ClickHouse/ClickHouse/issues/67412): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).
+* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8556375d543..9063d3ef971 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -6,6 +6,7 @@ v24.6.3.95-stable	2024-08-06
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05
+v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13
@@ -14,6 +15,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.9.5-lts	2024-08-22
 v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02

From 1ea0163dfe6b3278d8a5e8d86c31b3d63d7a3780 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Thu, 22 Aug 2024 16:42:14 +0200
Subject: [PATCH 336/363] Fix issue with maps with arrays as keys

---
 src/Functions/FunctionsHashing.h                        | 4 ++--
 tests/queries/0_stateless/02534_keyed_siphash.reference | 7 ++++++-
 tests/queries/0_stateless/02534_keyed_siphash.sql       | 5 ++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 0cf4246fd66..3da0b2cd9be 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -93,9 +93,9 @@ namespace impl
             if (is_const)
                 i = 0;
             assert(key0->size() == key1->size());
-            if (offsets != nullptr)
+            if (offsets != nullptr && i > 0)
             {
-                const auto * const begin = offsets->begin();
+                const auto * const begin = std::upper_bound(offsets->begin(), offsets->end(), i - 1);
                 const auto * upper = std::upper_bound(begin, offsets->end(), i);
                 if (upper != offsets->end())
                     i = upper - begin;
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index 31c0cae8981..8b147025a05 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -244,5 +244,10 @@ Test emtpy arrays and maps
 0AD04BFD000000000000000000000000
 4761183170873013810
 0AD04BFD000000000000000000000000
+Test maps with arrays as keys
 16734549324845627102
-D675BB3D687973A238AB891DD99C7047
+1D03941D808D04810D2363A6C107D622
+16734549324845627102
+16734549324845627102
+1D03941D808D04810D2363A6C107D622
+1D03941D808D04810D2363A6C107D622
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index b499d8ef02b..ba3c4a9156d 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -351,5 +351,8 @@ SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []);
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []));
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []));
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])));
+SELECT 'Test maps with arrays as keys';
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3));
-SELECT hex(sipHash128Keyed((0::UInt64, 0::UInt64), map([0], 1, [2], 3)));
+SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)));
+SELECT sipHash64Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3)) FROM numbers(2);
+SELECT hex(sipHash128Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3))) FROM numbers(2);

From a93d1919804d1c8dc7760f20084ade9a09710a47 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Thu, 22 Aug 2024 16:43:38 +0200
Subject: [PATCH 337/363] Fix typo in test case

---
 tests/queries/0_stateless/02534_keyed_siphash.reference | 2 +-
 tests/queries/0_stateless/02534_keyed_siphash.sql       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index 8b147025a05..a05446a494e 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -239,7 +239,7 @@ Check bug found fuzzing
 Test arrays and maps
 608E1FF030C9E206185B112C2A25F1A7
 ABB65AE97711A2E053E324ED88B1D08B
-Test emtpy arrays and maps
+Test empty arrays and maps
 4761183170873013810
 0AD04BFD000000000000000000000000
 4761183170873013810
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index ba3c4a9156d..7cfc82512bd 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -346,7 +346,7 @@ INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g'
 SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a;
 DROP TABLE sipHashKeyed_keys;
 
-SELECT 'Test emtpy arrays and maps';
+SELECT 'Test empty arrays and maps';
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []);
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []));
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []));

From 0dc18247df3a290b4fb312325ff3b2a44a3f8357 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:10:24 +0000
Subject: [PATCH 338/363] Update version_date.tsv and changelogs after
 v24.6.3.38-stable

---
 docs/changelogs/v24.6.3.38-stable.md | 83 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  3 +
 2 files changed, 86 insertions(+)
 create mode 100644 docs/changelogs/v24.6.3.38-stable.md

diff --git a/docs/changelogs/v24.6.3.38-stable.md b/docs/changelogs/v24.6.3.38-stable.md
new file mode 100644
index 00000000000..01d7e26e31f
--- /dev/null
+++ b/docs/changelogs/v24.6.3.38-stable.md
@@ -0,0 +1,83 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.6.3.38-stable (4e33c831589) FIXME as compared to v24.6.2.17-stable (5710a8b5c0c)
+
+#### Improvement
+* Backported in [#66770](https://github.com/ClickHouse/ClickHouse/issues/66770): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#66885](https://github.com/ClickHouse/ClickHouse/issues/66885): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66303](https://github.com/ClickHouse/ClickHouse/issues/66303): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66330](https://github.com/ClickHouse/ClickHouse/issues/66330): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#66157](https://github.com/ClickHouse/ClickHouse/issues/66157): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#66210](https://github.com/ClickHouse/ClickHouse/issues/66210): Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66456](https://github.com/ClickHouse/ClickHouse/issues/66456): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#66228](https://github.com/ClickHouse/ClickHouse/issues/66228): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66183](https://github.com/ClickHouse/ClickHouse/issues/66183): Fix rare case with missing data in the result of distributed query, close [#61432](https://github.com/ClickHouse/ClickHouse/issues/61432). [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66271](https://github.com/ClickHouse/ClickHouse/issues/66271): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#66682](https://github.com/ClickHouse/ClickHouse/issues/66682): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#66587](https://github.com/ClickHouse/ClickHouse/issues/66587): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
+* Backported in [#66362](https://github.com/ClickHouse/ClickHouse/issues/66362): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#66613](https://github.com/ClickHouse/ClickHouse/issues/66613): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66693](https://github.com/ClickHouse/ClickHouse/issues/66693): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Backported in [#66577](https://github.com/ClickHouse/ClickHouse/issues/66577): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66721](https://github.com/ClickHouse/ClickHouse/issues/66721): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#66670](https://github.com/ClickHouse/ClickHouse/issues/66670): Fix reading of uninitialized memory when hashing empty tuples. This closes [#66559](https://github.com/ClickHouse/ClickHouse/issues/66559). [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#66952](https://github.com/ClickHouse/ClickHouse/issues/66952): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66956](https://github.com/ClickHouse/ClickHouse/issues/66956): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#66716](https://github.com/ClickHouse/ClickHouse/issues/66716): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#66759](https://github.com/ClickHouse/ClickHouse/issues/66759): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66751](https://github.com/ClickHouse/ClickHouse/issues/66751): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67635](https://github.com/ClickHouse/ClickHouse/issues/67635): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Backported in [#67482](https://github.com/ClickHouse/ClickHouse/issues/67482): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
+* Backported in [#67816](https://github.com/ClickHouse/ClickHouse/issues/67816): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67199](https://github.com/ClickHouse/ClickHouse/issues/67199): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#67381](https://github.com/ClickHouse/ClickHouse/issues/67381): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#67244](https://github.com/ClickHouse/ClickHouse/issues/67244): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#67503](https://github.com/ClickHouse/ClickHouse/issues/67503): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67578](https://github.com/ClickHouse/ClickHouse/issues/67578): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#67852](https://github.com/ClickHouse/ClickHouse/issues/67852): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67808](https://github.com/ClickHouse/ClickHouse/issues/67808): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67838](https://github.com/ClickHouse/ClickHouse/issues/67838): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Backport [#66599](https://github.com/ClickHouse/ClickHouse/issues/66599) to 24.6: Fix dropping named collection in local storage"'. [#66922](https://github.com/ClickHouse/ClickHouse/pull/66922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#66332](https://github.com/ClickHouse/ClickHouse/issues/66332): Do not raise a NOT_IMPLEMENTED error when getting s3 metrics with a multiple disk configuration. [#65403](https://github.com/ClickHouse/ClickHouse/pull/65403) ([Elena Torró](https://github.com/elenatorro)).
+* Backported in [#66142](https://github.com/ClickHouse/ClickHouse/issues/66142): Fix flaky test_storage_s3_queue tests. [#66009](https://github.com/ClickHouse/ClickHouse/pull/66009) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#66389](https://github.com/ClickHouse/ClickHouse/issues/66389): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66428](https://github.com/ClickHouse/ClickHouse/issues/66428): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66546](https://github.com/ClickHouse/ClickHouse/issues/66546): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66861](https://github.com/ClickHouse/ClickHouse/issues/66861): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
+* Backported in [#66877](https://github.com/ClickHouse/ClickHouse/issues/66877): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67061](https://github.com/ClickHouse/ClickHouse/issues/67061): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
+* Backported in [#66940](https://github.com/ClickHouse/ClickHouse/issues/66940): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67254](https://github.com/ClickHouse/ClickHouse/issues/67254): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67414](https://github.com/ClickHouse/ClickHouse/issues/67414): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).
+* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8556375d543..cc168f58862 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -3,9 +3,11 @@ v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
+v24.6.3.38-stable	2024-08-22
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05
+v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13
@@ -14,6 +16,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.9.5-lts	2024-08-22
 v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02

From 0b9c24f31d548c87deca3334282c14fc78a295ba Mon Sep 17 00:00:00 2001
From: Michael Stetsyuk <michael.stetsyuk@clickhouse.com>
Date: Thu, 15 Aug 2024 12:09:50 +0000
Subject: [PATCH 339/363] write metadata to disk and keeper in the same format

---
 src/Storages/ColumnsDescription.cpp           | 30 +++++---
 src/Storages/ColumnsDescription.h             |  6 +-
 .../__init__.py                               |  0
 .../config/enable_keeper.xml                  | 26 +++++++
 .../config/users.xml                          |  8 +++
 .../test.py                                   | 71 +++++++++++++++++++
 6 files changed, 128 insertions(+), 13 deletions(-)
 create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py
 create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml
 create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml
 create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py

diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 0d724245b49..0212bbd6fff 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -113,7 +113,15 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
         && ast_to_str(ttl) == ast_to_str(other.ttl);
 }
 
-void ColumnDescription::writeText(WriteBuffer & buf) const
+String formatASTStateAware(IAST & ast, IAST::FormatState & state)
+{
+    WriteBufferFromOwnString buf;
+    IAST::FormatSettings settings(buf, true, false);
+    ast.formatImpl(settings, state, IAST::FormatStateStacked());
+    return buf.str();
+}
+
+void ColumnDescription::writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const
 {
     /// NOTE: Serialization format is insane.
 
@@ -126,20 +134,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
         writeChar('\t', buf);
         DB::writeText(DB::toString(default_desc.kind), buf);
         writeChar('\t', buf);
-        writeEscapedString(queryToString(default_desc.expression), buf);
+        writeEscapedString(formatASTStateAware(*default_desc.expression, state), buf);
     }
 
-    if (!comment.empty())
+    if (!comment.empty() && include_comment)
     {
         writeChar('\t', buf);
         DB::writeText("COMMENT ", buf);
-        writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf);
+        auto ast = ASTLiteral(Field(comment));
+        writeEscapedString(formatASTStateAware(ast, state), buf);
     }
 
     if (codec)
     {
         writeChar('\t', buf);
-        writeEscapedString(queryToString(codec), buf);
+        writeEscapedString(formatASTStateAware(*codec, state), buf);
     }
 
     if (!settings.empty())
@@ -150,21 +159,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
         ASTSetQuery ast;
         ast.is_standalone = false;
         ast.changes = settings;
-        writeEscapedString(queryToString(ast), buf);
+        writeEscapedString(formatASTStateAware(ast, state), buf);
         DB::writeText(")", buf);
     }
 
     if (!statistics.empty())
     {
         writeChar('\t', buf);
-        writeEscapedString(queryToString(statistics.getAST()), buf);
+        writeEscapedString(formatASTStateAware(*statistics.getAST(), state), buf);
     }
 
     if (ttl)
     {
         writeChar('\t', buf);
         DB::writeText("TTL ", buf);
-        writeEscapedString(queryToString(ttl), buf);
+        writeEscapedString(formatASTStateAware(*ttl, state), buf);
     }
 
     writeChar('\n', buf);
@@ -895,16 +904,17 @@ void ColumnsDescription::resetColumnTTLs()
 }
 
 
-String ColumnsDescription::toString() const
+String ColumnsDescription::toString(bool include_comments) const
 {
     WriteBufferFromOwnString buf;
+    IAST::FormatState ast_format_state;
 
     writeCString("columns format version: 1\n", buf);
     DB::writeText(columns.size(), buf);
     writeCString(" columns:\n", buf);
 
     for (const ColumnDescription & column : columns)
-        column.writeText(buf);
+        column.writeText(buf, ast_format_state, include_comments);
 
     return buf.str();
 }
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index f0760160f0a..c89c26501e8 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -104,7 +104,7 @@ struct ColumnDescription
     bool operator==(const ColumnDescription & other) const;
     bool operator!=(const ColumnDescription & other) const { return !(*this == other); }
 
-    void writeText(WriteBuffer & buf) const;
+    void writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const;
     void readText(ReadBuffer & buf);
 };
 
@@ -137,7 +137,7 @@ public:
     /// NOTE Must correspond with Nested::flatten function.
     void flattenNested(); /// TODO: remove, insert already flattened Nested columns.
 
-    bool operator==(const ColumnsDescription & other) const { return columns == other.columns; }
+    bool operator==(const ColumnsDescription & other) const { return toString(false) == other.toString(false); }
     bool operator!=(const ColumnsDescription & other) const { return !(*this == other); }
 
     auto begin() const { return columns.begin(); }
@@ -221,7 +221,7 @@ public:
     /// Does column has non default specified compression codec
     bool hasCompressionCodec(const String & column_name) const;
 
-    String toString() const;
+    String toString(bool include_comments = true) const;
     static ColumnsDescription parse(const String & str);
 
     size_t size() const
diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml
new file mode 100644
index 00000000000..4ca4f604ec3
--- /dev/null
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml
@@ -0,0 +1,26 @@
+<clickhouse>
+    <keeper_server>
+        <tcp_port>2181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+        <coordination_settings>
+            <session_timeout_ms>20000</session_timeout_ms>
+        </coordination_settings>
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>9444</port>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+
+    <zookeeper>
+        <node index="1">
+            <host>localhost</host>
+            <port>2181</port>
+        </node>
+        <session_timeout_ms>20000</session_timeout_ms>
+    </zookeeper>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml
new file mode 100644
index 00000000000..c5de0b6819c
--- /dev/null
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+    <users>
+        <default>
+            <profile>default</profile>
+            <no_password></no_password>
+        </default>
+    </users>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py
new file mode 100644
index 00000000000..e0c15e18c23
--- /dev/null
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py
@@ -0,0 +1,71 @@
+import pytest
+import random
+import string
+
+from helpers.cluster import ClickHouseCluster
+
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance(
+    "node",
+    main_configs=[
+        "config/enable_keeper.xml",
+        "config/users.xml",
+    ],
+    stay_alive=True,
+    with_minio=True,
+    macros={"shard": 1, "replica": 1},
+)
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def randomize_table_name(table_name, random_suffix_length=10):
+    letters = string.ascii_letters + string.digits
+    return f"{table_name}_{''.join(random.choice(letters) for _ in range(random_suffix_length))}"
+
+
+@pytest.mark.parametrize("engine", ["ReplicatedMergeTree"])
+def test_aliases_in_default_expr_not_break_table_structure(start_cluster, engine):
+    """
+    Making sure that using aliases in columns' default expressions does not lead to having different columns metadata in ZooKeeper and on disk.
+    Issue: https://github.com/ClickHouse/clickhouse-private/issues/5150
+    """
+
+    data = '{"event": {"col1-key": "col1-val", "col2-key": "col2-val"}}'
+
+    table_name = randomize_table_name("t")
+
+    node.query(
+        f"""
+        DROP TABLE IF EXISTS {table_name};
+        CREATE TABLE {table_name}
+        (
+            `data` String,
+            `col1` String DEFAULT JSONExtractString(JSONExtractString(data, 'event') AS event, 'col1-key'),
+            `col2` String MATERIALIZED JSONExtractString(JSONExtractString(data, 'event') AS event, 'col2-key')
+        )
+        ENGINE = {engine}('/test/{table_name}', '{{replica}}')
+        ORDER BY col1
+        """
+    )
+
+    node.restart_clickhouse()
+
+    node.query(
+        f"""
+        INSERT INTO {table_name} (data) VALUES ('{data}');
+        """
+    )
+    assert node.query(f"SELECT data FROM {table_name}").strip() == data
+    assert node.query(f"SELECT col1 FROM {table_name}").strip() == "col1-val"
+    assert node.query(f"SELECT col2 FROM {table_name}").strip() == "col2-val"
+
+    node.query(f"DROP TABLE {table_name}")

From 859d2bfe273f571458be6f007761bc8c743d589a Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 22 Aug 2024 17:18:06 +0200
Subject: [PATCH 340/363] move stopFlushThread to SystemLogBase

---
 src/Common/SystemLogBase.cpp     | 19 +++++++++++++++++++
 src/Common/SystemLogBase.h       |  2 ++
 src/Interpreters/PeriodicLog.cpp |  6 +++---
 src/Interpreters/PeriodicLog.h   |  2 +-
 src/Interpreters/SystemLog.cpp   | 21 +--------------------
 src/Interpreters/SystemLog.h     |  7 +------
 6 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 127c8862a35..45f4eb1c5a6 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -273,6 +273,25 @@ void SystemLogBase<LogElement>::startup()
     saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
 }
 
+template <typename LogElement>
+void SystemLogBase<LogElement>::stopFlushThread()
+{
+    {
+        std::lock_guard lock(thread_mutex);
+
+        if (!saving_thread || !saving_thread->joinable())
+            return;
+
+        if (is_shutdown)
+            return;
+
+        is_shutdown = true;
+        queue->shutdown();
+    }
+
+    saving_thread->join();
+}
+
 template <typename LogElement>
 void SystemLogBase<LogElement>::add(LogElement element)
 {
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 0d7b04d5c57..0942e920a42 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -216,6 +216,8 @@ public:
     static consteval bool shouldTurnOffLogger() { return false; }
 
 protected:
+    void stopFlushThread() final;
+
     std::shared_ptr<SystemLogQueue<LogElement>> queue;
 };
 }
diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp
index 15970ca5b81..1b285aad3ff 100644
--- a/src/Interpreters/PeriodicLog.cpp
+++ b/src/Interpreters/PeriodicLog.cpp
@@ -11,7 +11,7 @@ void PeriodicLog<LogElement>::startCollect(size_t collect_interval_milliseconds_
 {
     collect_interval_milliseconds = collect_interval_milliseconds_;
     is_shutdown_metric_thread = false;
-    flush_thread = std::make_unique<ThreadFromGlobalPool>([this] { threadFunction(); });
+    collecting_thread = std::make_unique<ThreadFromGlobalPool>([this] { threadFunction(); });
 }
 
 template <typename LogElement>
@@ -20,8 +20,8 @@ void PeriodicLog<LogElement>::stopCollect()
     bool old_val = false;
     if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true))
         return;
-    if (flush_thread)
-        flush_thread->join();
+    if (collecting_thread)
+        collecting_thread->join();
 }
 
 template <typename LogElement>
diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h
index ceac8088d40..8254a02434a 100644
--- a/src/Interpreters/PeriodicLog.h
+++ b/src/Interpreters/PeriodicLog.h
@@ -36,7 +36,7 @@ protected:
 private:
     void threadFunction();
 
-    std::unique_ptr<ThreadFromGlobalPool> flush_thread;
+    std::unique_ptr<ThreadFromGlobalPool> collecting_thread;
     size_t collect_interval_milliseconds;
     std::atomic<bool> is_shutdown_metric_thread{false};
 };
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 832c39bfaf8..6a3ec197c6e 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -402,32 +402,13 @@ SystemLog<LogElement>::SystemLog(
 template <typename LogElement>
 void SystemLog<LogElement>::shutdown()
 {
-    stopFlushThread();
+    Base::stopFlushThread();
 
     auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext());
     if (table)
         table->flushAndShutdown();
 }
 
-template <typename LogElement>
-void SystemLog<LogElement>::stopFlushThread()
-{
-    {
-        std::lock_guard lock(thread_mutex);
-
-        if (!saving_thread || !saving_thread->joinable())
-            return;
-
-        if (is_shutdown)
-            return;
-
-        is_shutdown = true;
-        queue->shutdown();
-    }
-
-    saving_thread->join();
-}
-
 
 template <typename LogElement>
 void SystemLog<LogElement>::savingThreadFunction()
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 9e1af3578bd..31652c1af67 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -125,8 +125,6 @@ public:
 
     void shutdown() override;
 
-    void stopFlushThread() override;
-
     /** Creates new table if it does not exist.
       * Renames old table if its structure is not suitable.
       * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created.
@@ -136,10 +134,7 @@ public:
 protected:
     LoggerPtr log;
 
-    using ISystemLog::is_shutdown;
-    using ISystemLog::saving_thread;
-    using ISystemLog::thread_mutex;
-    using Base::queue;
+   using Base::queue;
 
     StoragePtr getStorage() const;
 

From 51fbc629c6dff4653e687228b0507947516072bb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:42:17 +0000
Subject: [PATCH 341/363] Update version_date.tsv and changelogs after
 v24.7.3.47-stable

---
 docs/changelogs/v24.7.3.47-stable.md | 55 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  4 ++
 2 files changed, 59 insertions(+)
 create mode 100644 docs/changelogs/v24.7.3.47-stable.md

diff --git a/docs/changelogs/v24.7.3.47-stable.md b/docs/changelogs/v24.7.3.47-stable.md
new file mode 100644
index 00000000000..e5f23a70fe1
--- /dev/null
+++ b/docs/changelogs/v24.7.3.47-stable.md
@@ -0,0 +1,55 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.7.3.47-stable (2e50fe27a14) FIXME as compared to v24.7.2.13-stable (6e41f601b2f)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)).
+* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)).
+* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)).
+* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8556375d543..6ef5ace4ba6 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,11 +1,14 @@
 v24.8.1.2684-lts	2024-08-21
+v24.7.3.47-stable	2024-08-22
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
+v24.6.3.38-stable	2024-08-22
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05
+v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13
@@ -14,6 +17,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.9.5-lts	2024-08-22
 v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02

From 5f61e193401c5fa46db03542cb88ba4188ed00e9 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 17:46:47 +0200
Subject: [PATCH 342/363] small fixes

---
 docs/ru/getting-started/install.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md
index 4a0ec258c64..5bce41ec07a 100644
--- a/docs/ru/getting-started/install.md
+++ b/docs/ru/getting-started/install.md
@@ -22,7 +22,7 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
 
 ### Из deb-пакетов {#install-from-deb-packages}
 
-Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните:
+Рекомендуется использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните:
 
 ``` bash
 sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
@@ -55,7 +55,7 @@ clickhouse-client # or "clickhouse-client --password" if you've set up a passwor
 :::
 ### Из rpm-пакетов {#from-rpm-packages}
 
-Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm.
+Команда ClickHouse рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm.
 
 #### Установка официального репозитория
 
@@ -102,7 +102,7 @@ sudo yum install clickhouse-server clickhouse-client
 
 ### Из tgz-архивов {#from-tgz-archives}
 
-Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов.
+Команда ClickHouse рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов.
 
 Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/.
 После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии:

From 980b02bfd67defbbdf78165e8225fb754d722d7a Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 17:48:57 +0200
Subject: [PATCH 343/363] fix compatibility with en version

---
 docs/ru/getting-started/install.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md
index 5bce41ec07a..f8a660fbec9 100644
--- a/docs/ru/getting-started/install.md
+++ b/docs/ru/getting-started/install.md
@@ -31,9 +31,17 @@ curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | s
 echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
     /etc/apt/sources.list.d/clickhouse.list
 sudo apt-get update
+```
 
+#### Установка ClickHouse server и client
+
+```bash
 sudo apt-get install -y clickhouse-server clickhouse-client
+```
 
+#### Запуск ClickHouse server
+
+```bash
 sudo service clickhouse-server start
 clickhouse-client # or "clickhouse-client --password" if you've set up a password.
 ```

From 7c3a013d56c1dbd5b72f04f6be61f007004aaefa Mon Sep 17 00:00:00 2001
From: Mark Needham <m.h.needham@gmail.com>
Date: Thu, 22 Aug 2024 16:53:30 +0100
Subject: [PATCH 344/363] Update newjson.md

---
 docs/en/sql-reference/data-types/newjson.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md
index 9e43216df6c..f7fc7e1498e 100644
--- a/docs/en/sql-reference/data-types/newjson.md
+++ b/docs/en/sql-reference/data-types/newjson.md
@@ -70,7 +70,7 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json
 └────────────────────────────────────────────────┘
 ```
 
-CAST from named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later.
+CAST from `JSON`, named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later.
 
 ## Reading JSON paths as subcolumns
 

From 28fbd8a4eff4eafa7db99eb37e38376ffda11763 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 17:56:16 +0200
Subject: [PATCH 345/363] fix stateless tests

---
 .../queries/0_stateless/03203_hive_style_partitioning.reference | 2 --
 tests/queries/0_stateless/03203_hive_style_partitioning.sh      | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index acdadc2510b..a9d856babce 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -34,8 +34,6 @@ Cross	Elizabeth
 Array(Int64)	LowCardinality(Float64)
 101
 2070
-4081
-2070
 2070
 b
 1
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index b3d196924af..6734c5f14ad 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -32,7 +32,7 @@ SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMI
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
-SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 

From 9c0e1df1663dd5c56066dd615fc3cafe6408d308 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 22 Aug 2024 17:58:15 +0200
Subject: [PATCH 346/363] Fix flaky test 00989_parallel_parts_loading

---
 tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
index 407e124f137..dc074241ff6 100644
--- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql
+++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
@@ -1,3 +1,5 @@
+-- Tags: no-random-settings, no-random-merge-tree-settings
+-- small insert block size can make insert terribly slow, especially with some build like msan
 DROP TABLE IF EXISTS mt;
 
 CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;

From 0bd8ebf62616ce882b0ebc46945c837a5a91ba44 Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Thu, 22 Aug 2024 17:58:56 +0200
Subject: [PATCH 347/363] Update README.md

adding community call. resolving recent recordings
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c9474ef0fc0..9099fd48659 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh
 
 Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
 
-* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 20
+* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 266
 
 ## Upcoming Events
 
@@ -58,7 +58,7 @@ Other upcoming meetups
 
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
-* **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now!
+* **Recording available**: [**v24.8 LTS Release Call**](https://www.youtube.com/watch?v=AeLmp2jc51k) All the features of 24.8 LTS, one convenient video! Watch it now!
   
  ## Interested in joining ClickHouse and making it your full-time job? 
   

From 52cdd88eb6d7bbb5d395dd80445655ad47c83c92 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 22 Aug 2024 17:59:10 +0200
Subject: [PATCH 348/363] Better comment

---
 tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
index dc074241ff6..3b73e6a0e3c 100644
--- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql
+++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
@@ -1,5 +1,5 @@
 -- Tags: no-random-settings, no-random-merge-tree-settings
--- small insert block size can make insert terribly slow, especially with some build like msan
+-- small number of insert threads can make insert terribly slow, especially with some build like msan
 DROP TABLE IF EXISTS mt;
 
 CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;

From e7b89537bf1bb760c6082f04de4668bd1c00f33a Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 22 Aug 2024 18:02:42 +0200
Subject: [PATCH 349/363] fix style

---
 src/Interpreters/PeriodicLog.cpp | 1 -
 src/Interpreters/SystemLog.h     | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp
index 1b285aad3ff..22bc14856c4 100644
--- a/src/Interpreters/PeriodicLog.cpp
+++ b/src/Interpreters/PeriodicLog.cpp
@@ -1,7 +1,6 @@
 #include <Interpreters/PeriodicLog.h>
 #include <Interpreters/ErrorLog.h>
 #include <Interpreters/MetricLog.h>
-#include "Functions/DateTimeTransforms.h"
 
 namespace DB
 {
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 31652c1af67..c03f9370068 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -134,7 +134,7 @@ public:
 protected:
     LoggerPtr log;
 
-   using Base::queue;
+    using Base::queue;
 
     StoragePtr getStorage() const;
 

From 1692360233593e635c5a7797847bdfd8a0ffa33e Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Thu, 22 Aug 2024 18:12:38 +0200
Subject: [PATCH 350/363] Update README.md

26 and 266 are different
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9099fd48659..83a5c05c667 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh
 
 Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
 
-* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 266
+* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26
 
 ## Upcoming Events
 

From 4264fbc037accedecebcd8122910e4406e92cd58 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 16:16:47 +0000
Subject: [PATCH 351/363] Update version_date.tsv and changelogs after
 v24.8.2.3-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v24.8.2.3-lts.md     | 12 ++++++++++++
 utils/list-versions/version_date.tsv |  5 +++++
 5 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v24.8.2.3-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index fc93cee5bbc..6ff7ea43374 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.8.1.2684"
+ARG VERSION="24.8.2.3"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 3ceaf2a08b4..c87885d3b49 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.8.1.2684"
+ARG VERSION="24.8.2.3"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 76db997821c..6ccf74823e2 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.8.1.2684"
+ARG VERSION="24.8.2.3"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 #docker-official-library:off
diff --git a/docs/changelogs/v24.8.2.3-lts.md b/docs/changelogs/v24.8.2.3-lts.md
new file mode 100644
index 00000000000..69dfc9961a2
--- /dev/null
+++ b/docs/changelogs/v24.8.2.3-lts.md
@@ -0,0 +1,12 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.8.2.3-lts (b54f79ed323) FIXME as compared to v24.8.1.2684-lts (161c62fd295)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68670](https://github.com/ClickHouse/ClickHouse/issues/68670): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8556375d543..199c4f822f4 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,11 +1,15 @@
+v24.8.2.3-lts	2024-08-22
 v24.8.1.2684-lts	2024-08-21
+v24.7.3.47-stable	2024-08-22
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
+v24.6.3.38-stable	2024-08-22
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05
+v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13
@@ -14,6 +18,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.9.5-lts	2024-08-22
 v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02

From fa453c3664b18da7a6945e662b881f80fedadc5b Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 22 Aug 2024 18:13:45 +0200
Subject: [PATCH 352/363] Disable SqlLogic job

---
 tests/ci/ci_config.py      | 7 ++++---
 tests/ci/ci_definitions.py | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 58de25f039f..0885f1d9ec2 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -498,9 +498,10 @@ class CI:
         JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_DEBUG],
         ),
-        JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties(
-            required_builds=[BuildNames.PACKAGE_RELEASE],
-        ),
+        # TODO: job does not work at all, uncomment and fix
+        # JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties(
+        #     required_builds=[BuildNames.PACKAGE_RELEASE],
+        # ),
         JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_RELEASE],
         ),
diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py
index 1cdb3f1487e..9d95a19790f 100644
--- a/tests/ci/ci_definitions.py
+++ b/tests/ci/ci_definitions.py
@@ -204,7 +204,7 @@ class JobNames(metaclass=WithIter):
     PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)"
     PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)"
 
-    SQL_LOGIC_TEST = "Sqllogic test (release)"
+    # SQL_LOGIC_TEST = "Sqllogic test (release)"
 
     SQLANCER = "SQLancer (release)"
     SQLANCER_DEBUG = "SQLancer (debug)"

From 06c46ee75bcb94fe02ac68df6a4a044145804d76 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 18:56:50 +0200
Subject: [PATCH 353/363] add one more test

---
 .../0_stateless/03203_hive_style_partitioning.reference     | 1 +
 tests/queries/0_stateless/03203_hive_style_partitioning.sh  | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index a9d856babce..0fbc1fb556e 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -37,6 +37,7 @@ Array(Int64)	LowCardinality(Float64)
 2070
 b
 1
+1
 TESTING THE URL PARTITIONING
  last	Elizabeth
 Frank	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 6734c5f14ad..8ab18f5edfe 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -29,6 +29,12 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c
 SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """
 
+$CLICKHOUSE_LOCAL -n -q """
+set use_hive_partitioning = 1;
+
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
+""" 2>&1 | grep -c "INCORRECT_DATA"
+
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 

From f89193fa416cc333f549d72bb8ba453907edc951 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 19:12:19 +0000
Subject: [PATCH 354/363] Update version_date.tsv and changelogs after
 v24.5.5.41-stable

---
 utils/list-versions/version_date.tsv | 2 --
 1 file changed, 2 deletions(-)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 199c4f822f4..0e25f8d3b62 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,11 +1,9 @@
 v24.8.2.3-lts	2024-08-22
 v24.8.1.2684-lts	2024-08-21
-v24.7.3.47-stable	2024-08-22
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
-v24.6.3.38-stable	2024-08-22
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05

From 4200b3d5cbbfe065073c40f1e122c44189f3554f Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 22 Aug 2024 14:02:25 +0200
Subject: [PATCH 355/363] CI: Stress test fix

---
 tests/clickhouse-test                 | 2 +-
 tests/docker_scripts/stress_runner.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 4f9380d6f20..ad6173065fe 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -3567,7 +3567,7 @@ if __name__ == "__main__":
             f"Cannot access the specified directory with queries ({args.queries})",
             file=sys.stderr,
         )
-        sys.exit(1)
+        assert False, "No --queries provided"
 
     CAPTURE_CLIENT_STACKTRACE = args.capture_client_stacktrace
 
diff --git a/tests/docker_scripts/stress_runner.sh b/tests/docker_scripts/stress_runner.sh
index 7666398e10b..039c60c8e4e 100755
--- a/tests/docker_scripts/stress_runner.sh
+++ b/tests/docker_scripts/stress_runner.sh
@@ -10,8 +10,7 @@ dmesg --clear
 # shellcheck disable=SC1091
 source /setup_export_logs.sh
 
-ln -s /repo/tests/clickhouse-test/ci/stress.py /usr/bin/stress
-ln -s /repo/tests/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
+ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test
 
 # Stress tests and upgrade check uses similar code that was placed
 # in a separate bash library. See tests/ci/stress_tests.lib
@@ -266,6 +265,7 @@ fi
 
 start_server
 
+cd /repo/tests/ || exit 1  # clickhouse-test can find queries dir from there
 python3 /repo/tests/ci/stress.py --hung-check --drop-databases --output-folder /test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
     && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
     || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv

From dc862b1411884a462bba8dcf86a474ccbe57e380 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 23:40:18 +0200
Subject: [PATCH 356/363] fix test

---
 tests/queries/0_stateless/03203_hive_style_partitioning.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 8ab18f5edfe..60e8a6e9faa 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -32,7 +32,7 @@ SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMI
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "INCORRECT_DATA"
 
 $CLICKHOUSE_LOCAL -n -q """

From 4c790999eb6ad74e3a8f99c072dcc12c956a63d8 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 23 Aug 2024 02:18:26 +0200
Subject: [PATCH 357/363] CI: Force package_debug build on release branches

---
 .github/workflows/release_branches.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 82826794ea3..ec119b6ff95 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -130,6 +130,7 @@ jobs:
     with:
       build_name: package_debug
       data: ${{ needs.RunConfig.outputs.data }}
+      force: true
   BuilderBinDarwin:
     needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() }}

From f5739dfe06db8610818fafb5c3a2c33f59fd0a8d Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 23 Aug 2024 02:51:27 +0200
Subject: [PATCH 358/363] CI: Make job rerun possible if triggered manually

---
 tests/ci/ci.py       | 7 +++++--
 tests/ci/ci_utils.py | 5 +++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index a9ae078b449..d201b6602f5 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -333,7 +333,10 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
             CI.JobNames.BUILD_CHECK,
         ):  # we might want to rerun build report job
             rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name))
-            if rerun_helper.is_already_finished_by_status():
+            if (
+                rerun_helper.is_already_finished_by_status()
+                and not Utils.is_job_triggered_manually()
+            ):
                 print("WARNING: Rerunning job with GH status ")
                 status = rerun_helper.get_finished_status()
                 assert status
@@ -344,7 +347,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
                 skip_status = status.state
 
         # ci cache check
-        if not to_be_skipped and not no_cache:
+        if not to_be_skipped and not no_cache and not Utils.is_job_triggered_manually():
             ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update()
             job_config = CI.get_job_config(job_name)
             if ci_cache.is_successful(
diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py
index a4c0977f47c..e8d9e7dc254 100644
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@@ -18,6 +18,7 @@ class Envs:
     )
     S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
     GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "")
+    GITHUB_ACTOR = os.getenv("GITHUB_ACTOR", "")
 
 
 class WithIter(type):
@@ -282,3 +283,7 @@ class Utils:
         ):
             res = res.replace(*r)
         return res
+
+    @staticmethod
+    def is_job_triggered_manually():
+        return "robot" not in Envs.GITHUB_ACTOR

From 60e4bcbbf0b1991b42bcab4b83e55be344e8a659 Mon Sep 17 00:00:00 2001
From: Tanya Bragin <tbragin@users.noreply.github.com>
Date: Thu, 22 Aug 2024 20:45:28 -0700
Subject: [PATCH 359/363] Update README.md

Update Raleigh meetup link
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 83a5c05c667..546f08afd3d 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey
 
 * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
-* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9
+* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
 * [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
 

From e5380806653f8d391c6e88664b0096c3c51240f5 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 23 Aug 2024 07:09:03 +0000
Subject: [PATCH 360/363] Update version_date.tsv and changelogs after
 v24.5.6.45-stable

---
 docs/changelogs/v24.5.6.45-stable.md | 33 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 +-
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 docs/changelogs/v24.5.6.45-stable.md

diff --git a/docs/changelogs/v24.5.6.45-stable.md b/docs/changelogs/v24.5.6.45-stable.md
new file mode 100644
index 00000000000..b329ebab27b
--- /dev/null
+++ b/docs/changelogs/v24.5.6.45-stable.md
@@ -0,0 +1,33 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.5.6.45-stable (bdca8604c29) FIXME as compared to v24.5.5.78-stable (0138248cb62)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Backported in [#68756](https://github.com/ClickHouse/ClickHouse/issues/68756): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 0e25f8d3b62..57a59d7ac49 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -6,8 +6,8 @@ v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
+v24.5.6.45-stable	2024-08-23
 v24.5.5.78-stable	2024-08-05
-v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13

From e1a7bd9163bebf0aeab12d8dd46c729f73b068be Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 23 Aug 2024 07:37:32 +0000
Subject: [PATCH 361/363] Update version_date.tsv and changelogs after
 v24.6.4.42-stable

---
 docs/changelogs/v24.6.4.42-stable.md | 33 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  3 ++-
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 docs/changelogs/v24.6.4.42-stable.md

diff --git a/docs/changelogs/v24.6.4.42-stable.md b/docs/changelogs/v24.6.4.42-stable.md
new file mode 100644
index 00000000000..29b6ba095af
--- /dev/null
+++ b/docs/changelogs/v24.6.4.42-stable.md
@@ -0,0 +1,33 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.6.4.42-stable (c534bb4b4dd) FIXME as compared to v24.6.3.95-stable (8325c920d11)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Backported in [#68758](https://github.com/ClickHouse/ClickHouse/issues/68758): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 0e25f8d3b62..8ce510f110d 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -3,11 +3,12 @@ v24.8.1.2684-lts	2024-08-21
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
+v24.6.4.42-stable	2024-08-23
 v24.6.3.95-stable	2024-08-06
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
+v24.5.6.45-stable	2024-08-23
 v24.5.5.78-stable	2024-08-05
-v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13

From eec720dab60ea63b033919bbc4c1f6837920a42d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 23 Aug 2024 08:05:27 +0000
Subject: [PATCH 362/363] Update version_date.tsv and changelogs after
 v24.7.4.51-stable

---
 docs/changelogs/v24.7.4.51-stable.md | 36 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 ++
 2 files changed, 38 insertions(+)
 create mode 100644 docs/changelogs/v24.7.4.51-stable.md

diff --git a/docs/changelogs/v24.7.4.51-stable.md b/docs/changelogs/v24.7.4.51-stable.md
new file mode 100644
index 00000000000..a7cf9790383
--- /dev/null
+++ b/docs/changelogs/v24.7.4.51-stable.md
@@ -0,0 +1,36 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.7.4.51-stable (70fe2f6fa52) FIXME as compared to v24.7.3.42-stable (63730bc4293)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)).
+* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68760](https://github.com/ClickHouse/ClickHouse/issues/68760): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 57a59d7ac49..d9674ed2366 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,8 +1,10 @@
 v24.8.2.3-lts	2024-08-22
 v24.8.1.2684-lts	2024-08-21
+v24.7.4.51-stable	2024-08-23
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
+v24.6.4.42-stable	2024-08-23
 v24.6.3.95-stable	2024-08-06
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01

From b0894bffe62722acee2fa5d832ceda9a75754bde Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 23 Aug 2024 12:01:17 +0200
Subject: [PATCH 363/363] change test file location

---
 .../sample.parquet                                  | Bin
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/{column0=Elizabeth => column0=Elizabeth1}/sample.parquet (100%)

diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet
similarity index 100%
rename from tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet
rename to tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet