From 3cfb921befa895e445e8d7b98e639015e1e41aa0 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 13 Aug 2024 18:41:53 +0000
Subject: [PATCH 001/114] Fix using schema_inference_make_columns_nullable=0

---
 src/Core/Settings.h                           |  2 +-
 src/Formats/FormatFactory.cpp                 |  2 +-
 src/Formats/FormatSettings.h                  |  2 +-
 src/Formats/SchemaInferenceUtils.cpp          |  4 +++
 src/Processors/Formats/ISchemaReader.cpp      |  2 +-
 .../Formats/Impl/ArrowBlockInputFormat.cpp    |  7 +++--
 .../Formats/Impl/ArrowColumnToCHColumn.cpp    |  9 ++++---
 .../Formats/Impl/ArrowColumnToCHColumn.h      |  3 ++-
 .../Impl/NativeORCBlockInputFormat.cpp        |  2 +-
 .../Formats/Impl/ORCBlockInputFormat.cpp      |  7 +++--
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |  7 +++--
 .../03036_parquet_arrow_nullable.reference    | 26 +++++++++++++++++++
 .../03036_parquet_arrow_nullable.sh           |  7 +++++
 13 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 0808e8eb49f..bc9c6daab1b 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1116,7 +1116,7 @@ class IColumn;
     M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
     M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
     M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
-    M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
+    M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHosue will use information about nullability from the data..", 0) \
     M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
     M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
     M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index a78836ff63c..8d8257b9abc 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -255,7 +255,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference;
     format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
     format_settings.schema_inference_hints = settings.schema_inference_hints;
-    format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable;
+    format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable.valueOr(2);
     format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name;
     format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names;
     format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index f0359218775..479b1a89adf 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -75,7 +75,7 @@ struct FormatSettings
         Raw
     };
 
-    bool schema_inference_make_columns_nullable = true;
+    UInt64 schema_inference_make_columns_nullable = true;
 
     DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
 
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 3c374ada9e6..c04682e8765 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -1262,7 +1262,11 @@ namespace
         if (checkCharCaseInsensitive('n', buf))
         {
             if (checkStringCaseInsensitive("ull", buf))
+            {
+                if (settings.schema_inference_make_columns_nullable == 0)
+                    return std::make_shared<DataTypeNothing>();
                 return makeNullable(std::make_shared<DataTypeNothing>());
+            }
             else if (checkStringCaseInsensitive("an", buf))
                 return std::make_shared<DataTypeFloat64>();
         }
diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index 45523700a5d..569d4bb39e7 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -54,7 +54,7 @@ void checkFinalInferredType(
         type = default_type;
     }
 
-    if (settings.schema_inference_make_columns_nullable)
+    if (settings.schema_inference_make_columns_nullable == 1)
         type = makeNullableRecursively(type);
     /// In case when data for some column could contain nulls and regular values,
     /// resulting inferred type is Nullable.
diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
index 72a93002669..cf079e52db0 100644
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@@ -204,8 +204,11 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
         schema = file_reader->schema();
 
     auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
-        *schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference);
-    if (format_settings.schema_inference_make_columns_nullable)
+        *schema,
+        stream ? "ArrowStream" : "Arrow",
+        format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference,
+        format_settings.schema_inference_make_columns_nullable != 0);
+    if (format_settings.schema_inference_make_columns_nullable == 1)
         return getNamesAndRecursivelyNullableTypes(header);
     return header.getNamesAndTypesList();
 }
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index ed91913de4d..bcc8bfecdc6 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -712,6 +712,7 @@ struct ReadColumnFromArrowColumnSettings
     FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior;
     bool allow_arrow_null_type;
     bool skip_columns_with_unsupported_types;
+    bool allow_inferring_nullable_columns;
 };
 
 static ColumnWithTypeAndName readColumnFromArrowColumn(
@@ -1085,7 +1086,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
     bool is_map_nested_column,
     const ReadColumnFromArrowColumnSettings & settings)
 {
-    bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable());
+    bool read_as_nullable_column = (arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable())) && settings.allow_inferring_nullable_columns;
     if (read_as_nullable_column &&
         arrow_column->type()->id() != arrow::Type::LIST &&
         arrow_column->type()->id() != arrow::Type::LARGE_LIST &&
@@ -1149,14 +1150,16 @@ static std::shared_ptr<arrow::ChunkedArray> createArrowColumn(const std::shared_
 Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
     const arrow::Schema & schema,
     const std::string & format_name,
-    bool skip_columns_with_unsupported_types)
+    bool skip_columns_with_unsupported_types,
+    bool allow_inferring_nullable_columns)
 {
     ReadColumnFromArrowColumnSettings settings
     {
         .format_name = format_name,
         .date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore,
         .allow_arrow_null_type = false,
-        .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types
+        .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types,
+        .allow_inferring_nullable_columns = allow_inferring_nullable_columns,
     };
 
     ColumnsWithTypeAndName sample_columns;
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
index 27e9afdf763..8521cd2f410 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
@@ -34,7 +34,8 @@ public:
     static Block arrowSchemaToCHHeader(
         const arrow::Schema & schema,
         const std::string & format_name,
-        bool skip_columns_with_unsupported_types = false);
+        bool skip_columns_with_unsupported_types = false,
+        bool allow_inferring_nullable_columns = true);
 
     struct DictionaryInfo
     {
diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 58bec8120f1..b0fd6789d1a 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -1002,7 +1002,7 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
             header.insert(ColumnWithTypeAndName{type, name});
     }
 
-    if (format_settings.schema_inference_make_columns_nullable)
+    if (format_settings.schema_inference_make_columns_nullable == 1)
         return getNamesAndRecursivelyNullableTypes(header);
     return header.getNamesAndTypesList();
 }
diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
index a3c218fa26e..2266c0b488c 100644
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@@ -160,8 +160,11 @@ NamesAndTypesList ORCSchemaReader::readSchema()
 {
     initializeIfNeeded();
     auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
-        *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference);
-    if (format_settings.schema_inference_make_columns_nullable)
+        *schema,
+        "ORC",
+        format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference,
+        format_settings.schema_inference_make_columns_nullable != 0);
+    if (format_settings.schema_inference_make_columns_nullable == 1)
         return getNamesAndRecursivelyNullableTypes(header);
     return header.getNamesAndTypesList();
 }
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index bc5e8292192..b116070b8df 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -810,8 +810,11 @@ NamesAndTypesList ParquetSchemaReader::readSchema()
     THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema));
 
     auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
-        *schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference);
-    if (format_settings.schema_inference_make_columns_nullable)
+        *schema,
+        "Parquet",
+        format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference,
+        format_settings.schema_inference_make_columns_nullable != 0);
+    if (format_settings.schema_inference_make_columns_nullable == 1)
         return getNamesAndRecursivelyNullableTypes(header);
     return header.getNamesAndTypesList();
 }
diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference
index 985f8192f26..d15f0d8365d 100644
--- a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference
+++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference
@@ -1,40 +1,66 @@
 Parquet
 a	UInt64					
 a_nullable	Nullable(UInt64)					
+a	UInt64					
+a_nullable	UInt64					
 Arrow
 a	UInt64					
 a_nullable	Nullable(UInt64)					
+a	UInt64					
+a_nullable	UInt64					
 Parquet
 b	Array(UInt64)					
 b_nullable	Array(Nullable(UInt64))					
+b	Array(UInt64)					
+b_nullable	Array(UInt64)					
 Arrow
 b	Array(Nullable(UInt64))					
 b_nullable	Array(Nullable(UInt64))					
+b	Array(UInt64)					
+b_nullable	Array(UInt64)					
 Parquet
 c	Tuple(\n    a UInt64,\n    b String)					
 c_nullable	Tuple(\n    a Nullable(UInt64),\n    b Nullable(String))					
+c	Tuple(\n    a UInt64,\n    b String)					
+c_nullable	Tuple(\n    a UInt64,\n    b String)					
 Arrow
 c	Tuple(\n    a UInt64,\n    b String)					
 c_nullable	Tuple(\n    a Nullable(UInt64),\n    b Nullable(String))					
+c	Tuple(\n    a UInt64,\n    b String)					
+c_nullable	Tuple(\n    a UInt64,\n    b String)					
 Parquet
 d	Tuple(\n    a UInt64,\n    b Tuple(\n        a UInt64,\n        b String),\n    d_nullable Tuple(\n        a UInt64,\n        b Tuple(\n            a Nullable(UInt64),\n            b Nullable(String))))					
+d	Tuple(\n    a UInt64,\n    b Tuple(\n        a UInt64,\n        b String),\n    d_nullable Tuple(\n        a UInt64,\n        b Tuple(\n            a UInt64,\n            b String)))					
 Arrow
 d	Tuple(\n    a UInt64,\n    b Tuple(\n        a UInt64,\n        b String),\n    d_nullable Tuple(\n        a UInt64,\n        b Tuple(\n            a Nullable(UInt64),\n            b Nullable(String))))					
+d	Tuple(\n    a UInt64,\n    b Tuple(\n        a UInt64,\n        b String),\n    d_nullable Tuple(\n        a UInt64,\n        b Tuple(\n            a UInt64,\n            b String)))					
 Parquet
 e	Map(UInt64, String)					
 e_nullable	Map(UInt64, Nullable(String))					
+e	Map(UInt64, String)					
+e_nullable	Map(UInt64, String)					
 Arrow
 e	Map(UInt64, Nullable(String))					
 e_nullable	Map(UInt64, Nullable(String))					
+e	Map(UInt64, String)					
+e_nullable	Map(UInt64, String)					
 Parquet
 f	Map(UInt64, Map(UInt64, String))					
 f_nullables	Map(UInt64, Map(UInt64, Nullable(String)))					
+f	Map(UInt64, Map(UInt64, String))					
+f_nullables	Map(UInt64, Map(UInt64, String))					
 Arrow
 f	Map(UInt64, Map(UInt64, Nullable(String)))					
 f_nullables	Map(UInt64, Map(UInt64, Nullable(String)))					
+f	Map(UInt64, Map(UInt64, String))					
+f_nullables	Map(UInt64, Map(UInt64, String))					
 Parquet
 g	String					
 g_nullable	Nullable(String)					
+g	String					
+g_nullable	String					
 Arrow
 g	LowCardinality(String)					
 g_nullable	LowCardinality(String)					
+g	LowCardinality(String)					
+g_nullable	LowCardinality(String)					
diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh
index bdd641e2b94..379756f78f3 100755
--- a/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh
+++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh
@@ -14,6 +14,7 @@ for format in $formats
 do
     echo $format
     $CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE
+    $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
     $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
 done
 
@@ -21,6 +22,7 @@ for format in $formats
 do
     echo $format
     $CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE
+    $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
     $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
 done
 
@@ -28,6 +30,7 @@ for format in $formats
 do
     echo $format
     $CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE
+    $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
     $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
 done
 
@@ -35,6 +38,7 @@ for format in $formats
 do
     echo $format
     $CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE
+    $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
     $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
 done
 
@@ -42,6 +46,7 @@ for format in $formats
 do
     echo $format
     $CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE
+    $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
     $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
 done
 
@@ -49,6 +54,7 @@ for format in $formats
 do
     echo $format
     $CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE
+    $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
     $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
 done
 
@@ -56,6 +62,7 @@ for format in $formats
 do
     echo $format
     $CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE
+    $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'"
     $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0"
 done
 

From 70708fd5dcf633d4d3147240195554587f4fb14f Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 13 Aug 2024 19:19:02 +0000
Subject: [PATCH 002/114] Update docs, make better

---
 docs/en/interfaces/schema-inference.md          | 7 ++-----
 docs/en/operations/settings/settings-formats.md | 4 ++--
 src/Processors/Formats/ISchemaReader.cpp        | 5 -----
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md
index 05fae994cbe..5b3cd179e21 100644
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@@ -1385,7 +1385,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
 #### schema_inference_make_columns_nullable
 
 Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
-If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if `input_format_null_as_default` is disabled and the column contains `NULL` in a sample that is parsed during schema inference.
+If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
 
 Enabled by default.
 
@@ -1408,15 +1408,13 @@ DESC format(JSONEachRow, $$
 └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```
 ```sql
-SET schema_inference_make_columns_nullable = 0;
-SET input_format_null_as_default = 0;    
+SET schema_inference_make_columns_nullable = 'auto';
 DESC format(JSONEachRow, $$
                                 {"id" :  1, "age" :  25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
                                 {"id" :  2, "age" :  19, "name" :  "Alan", "status" : "married", "hobbies" :  ["tennis", "art"]}
                          $$)
 ```
 ```response
-
 ┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ id      │ Int64            │              │                    │         │                  │                │
 │ age     │ Int64            │              │                    │         │                  │                │
@@ -1428,7 +1426,6 @@ DESC format(JSONEachRow, $$
 
 ```sql
 SET schema_inference_make_columns_nullable = 0;
-SET input_format_null_as_default = 1;    
 DESC format(JSONEachRow, $$
                                 {"id" :  1, "age" :  25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
                                 {"id" :  2, "age" :  19, "name" :  "Alan", "status" : "married", "hobbies" :  ["tennis", "art"]}
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index f8b40cd81ac..57812ef0e03 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -171,8 +171,8 @@ If the `schema_inference_hints` is not formated properly, or if there is a typo
 
 ## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
 
-Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
-If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference.
+Controls making inferred types `Nullable` in schema inference.
+If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
 
 Default value: `true`.
 
diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index 569d4bb39e7..e002e64b7e5 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -56,11 +56,6 @@ void checkFinalInferredType(
 
     if (settings.schema_inference_make_columns_nullable == 1)
         type = makeNullableRecursively(type);
-    /// In case when data for some column could contain nulls and regular values,
-    /// resulting inferred type is Nullable.
-    /// If input_format_null_as_default is enabled, we should remove Nullable type.
-    else if (settings.null_as_default)
-        type = removeNullable(type);
 }
 
 void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)

From e2feaefcaf0e88f86f303c068edcbdacaeb67252 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 14 Aug 2024 15:13:49 +0200
Subject: [PATCH 003/114] Update src/Core/Settings.h

Co-authored-by: Alexey Katsman <alex.katsman@clickhouse.com>
---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index bc9c6daab1b..2417ddd39e8 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1116,7 +1116,7 @@ class IColumn;
     M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
     M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
     M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
-    M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHosue will use information about nullability from the data..", 0) \
+    M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHouse will use information about nullability from the data.", 0) \
     M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
     M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
     M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \

From ccb7ecb9a22ddeabe93a5b907e3ad688b04966b4 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 14 Aug 2024 15:13:57 +0200
Subject: [PATCH 004/114] Update src/Formats/FormatSettings.h

Co-authored-by: Alexey Katsman <alex.katsman@clickhouse.com>
---
 src/Formats/FormatSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 479b1a89adf..81b34ff0c55 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -75,7 +75,7 @@ struct FormatSettings
         Raw
     };
 
-    UInt64 schema_inference_make_columns_nullable = true;
+    UInt64 schema_inference_make_columns_nullable = 1;
 
     DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
 

From 92a9b29b45c254e670fe9f67114b5af890bfb5cb Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Thu, 15 Aug 2024 22:25:21 +0800
Subject: [PATCH 005/114] devirtualize format reader

---
 .../Formats/Impl/BinaryRowInputFormat.cpp     |  4 +-
 .../Formats/Impl/BinaryRowInputFormat.h       |  7 ++-
 .../Formats/Impl/CSVRowInputFormat.cpp        |  2 +-
 .../Formats/Impl/CSVRowInputFormat.h          |  7 ++-
 .../Impl/CustomSeparatedRowInputFormat.h      |  3 +-
 .../Impl/JSONCompactEachRowRowInputFormat.h   |  4 +-
 .../Impl/JSONCompactRowInputFormat.cpp        |  2 +-
 .../Formats/Impl/JSONCompactRowInputFormat.h  |  4 +-
 .../Formats/Impl/TabSeparatedRowInputFormat.h |  4 +-
 .../RowInputFormatWithNamesAndTypes.cpp       | 61 +++++++++++++------
 .../Formats/RowInputFormatWithNamesAndTypes.h |  5 +-
 11 files changed, 67 insertions(+), 36 deletions(-)

diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
index c5336f3bcc7..b549f2de975 100644
--- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
@@ -15,8 +15,8 @@ namespace ErrorCodes
 }
 
 template <bool with_defaults>
-BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
-    : RowInputFormatWithNamesAndTypes(
+BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
+    : RowInputFormatWithNamesAndTypes<BinaryFormatReader<with_defaults>>(
         header,
         in_,
         params_,
diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h
index 6f2042d1315..6a4ca8f6418 100644
--- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h
+++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h
@@ -10,13 +10,16 @@ namespace DB
 
 class ReadBuffer;
 
+template <bool>
+class BinaryFormatReader;
+
 /** A stream for inputting data in a binary line-by-line format.
   */
 template <bool with_defaults = false>
-class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes
+class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes<BinaryFormatReader<with_defaults>>
 {
 public:
-    BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
+    BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
 
     String getName() const override { return "BinaryRowInputFormat"; }
 
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index b7f84748f61..cf58a4057c8 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -61,7 +61,7 @@ CSVRowInputFormat::CSVRowInputFormat(
     bool with_names_,
     bool with_types_,
     const FormatSettings & format_settings_,
-    std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_)
+    std::unique_ptr<CSVFormatReader> format_reader_)
     : RowInputFormatWithNamesAndTypes(
         header_,
         *in_,
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h
index fe4d4e3be08..86af5028438 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <optional>
-#include <unordered_map>
 
 #include <Core/Block.h>
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
@@ -13,10 +12,12 @@
 namespace DB
 {
 
+class CSVFormatReader;
+
 /** A stream for inputting data in csv format.
   * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values.
   */
-class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes
+class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes<CSVFormatReader>
 {
 public:
     /** with_names - in the first line the header with column names
@@ -32,7 +33,7 @@ public:
 
 protected:
     CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_, const Params & params_,
-                               bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_);
+                               bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr<CSVFormatReader> format_reader_);
 
     CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_buf_, const Params & params_,
                       bool with_names_, bool with_types_, const FormatSettings & format_settings_);
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
index 58f78e5af42..b1d35947ba8 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
@@ -9,7 +9,8 @@
 namespace DB
 {
 
-class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes
+class CustomSeparatedFormatReader;
+class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes<CustomSeparatedFormatReader>
 {
 public:
     CustomSeparatedRowInputFormat(
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
index ebeb939e7fa..50589329073 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
@@ -11,7 +11,7 @@ namespace DB
 {
 
 class ReadBuffer;
-
+class JSONCompactEachRowFormatReader;
 
 /** A stream for reading data in a bunch of formats:
  *  - JSONCompactEachRow
@@ -20,7 +20,7 @@ class ReadBuffer;
  *  - JSONCompactStringsEachRowWithNamesAndTypes
  *
 */
-class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes
+class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes<JSONCompactEachRowFormatReader>
 {
 public:
     JSONCompactEachRowRowInputFormat(
diff --git a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.cpp
index 63066fc8220..63ced05dd3a 100644
--- a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.cpp
@@ -14,7 +14,7 @@ namespace ErrorCodes
 
 JSONCompactRowInputFormat::JSONCompactRowInputFormat(
     const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_)
-    : RowInputFormatWithNamesAndTypes(
+    : RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>(
         header_, in_, params_, false, false, false, format_settings_, std::make_unique<JSONCompactFormatReader>(in_, format_settings_))
 {
 }
diff --git a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h
index 3a93e7149b0..eb70f6ec2a3 100644
--- a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h
@@ -5,8 +5,8 @@
 
 namespace DB
 {
-
-class JSONCompactRowInputFormat final : public RowInputFormatWithNamesAndTypes
+class JSONCompactFormatReader;
+class JSONCompactRowInputFormat final : public RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>
 {
 public:
     JSONCompactRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_);
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
index 32abd532a52..3c6efe9ac4c 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
@@ -10,9 +10,11 @@
 namespace DB
 {
 
+class TabSeparatedFormatReader;
+
 /** A stream to input data in tsv format.
   */
-class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes
+class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes<TabSeparatedFormatReader>
 {
 public:
     /** with_names - the first line is the header with the names of the columns
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index ae30d741c2f..5701b80ecc2 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -1,14 +1,20 @@
-#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
-#include <Processors/Formats/ISchemaReader.h>
-#include <DataTypes/DataTypeNothing.h>
-#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeLowCardinality.h>
-#include <IO/ReadHelpers.h>
-#include <IO/Operators.h>
-#include <IO/ReadBufferFromString.h>
-#include <IO/PeekableReadBuffer.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <IO/Operators.h>
+#include <IO/PeekableReadBuffer.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadHelpers.h>
+#include <Processors/Formats/ISchemaReader.h>
+#include <Processors/Formats/Impl/BinaryRowInputFormat.h>
+#include <Processors/Formats/Impl/CSVRowInputFormat.h>
+#include <Processors/Formats/Impl/CustomSeparatedRowInputFormat.h>
+#include <Processors/Formats/Impl/HiveTextRowInputFormat.h>
+#include <Processors/Formats/Impl/JSONCompactRowInputFormat.h>
+#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
+#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
 
 
 namespace DB
@@ -44,7 +50,8 @@ namespace
     }
 }
 
-RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
+template <typename FormatReaderImpl>
+RowInputFormatWithNamesAndTypes<FormatReaderImpl>::RowInputFormatWithNamesAndTypes(
     const Block & header_,
     ReadBuffer & in_,
     const Params & params_,
@@ -52,7 +59,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
     bool with_names_,
     bool with_types_,
     const FormatSettings & format_settings_,
-    std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_,
+    std::unique_ptr<FormatReaderImpl> format_reader_,
     bool try_detect_header_)
     : RowInputFormatWithDiagnosticInfo(header_, in_, params_)
     , format_settings(format_settings_)
@@ -66,7 +73,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
     column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap();
 }
 
-void RowInputFormatWithNamesAndTypes::readPrefix()
+template <typename FormatReaderImpl>
+void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::readPrefix()
 {
     /// Search and remove BOM only in textual formats (CSV, TSV etc), not in binary ones (RowBinary*).
     /// Also, we assume that column name or type cannot contain BOM, so, if format has header,
@@ -138,7 +146,8 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
     }
 }
 
-void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & column_names_out, std::vector<String> & type_names_out)
+template <typename FormatReaderImpl>
+void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::tryDetectHeader(std::vector<String> & column_names_out, std::vector<String> & type_names_out)
 {
     auto & read_buf = getReadBuffer();
     PeekableReadBuffer * peekable_buf = dynamic_cast<PeekableReadBuffer *>(&read_buf);
@@ -201,7 +210,8 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
     peekable_buf->dropCheckpoint();
 }
 
-bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext)
+template <typename FormatReaderImpl>
+bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::readRow(MutableColumns & columns, RowReadExtension & ext)
 {
     if (unlikely(end_of_stream))
         return false;
@@ -280,7 +290,8 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
     return true;
 }
 
-size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size)
+template <typename FormatReaderImpl>
+size_t RowInputFormatWithNamesAndTypes<FormatReaderImpl>::countRows(size_t max_block_size)
 {
     if (unlikely(end_of_stream))
         return 0;
@@ -304,7 +315,8 @@ size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size)
     return num_rows;
 }
 
-void RowInputFormatWithNamesAndTypes::resetParser()
+template <typename FormatReaderImpl>
+void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::resetParser()
 {
     RowInputFormatWithDiagnosticInfo::resetParser();
     column_mapping->column_indexes_for_input_fields.clear();
@@ -313,7 +325,8 @@ void RowInputFormatWithNamesAndTypes::resetParser()
     end_of_stream = false;
 }
 
-void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
+template <typename FormatReaderImpl>
+void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
 {
     const auto & index = column_mapping->column_indexes_for_input_fields[file_column];
     if (index)
@@ -328,7 +341,8 @@ void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & ty
     }
 }
 
-bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
+template <typename FormatReaderImpl>
+bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
 {
     if (in->eof())
     {
@@ -374,12 +388,14 @@ bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColu
     return format_reader->parseRowEndWithDiagnosticInfo(out);
 }
 
-bool RowInputFormatWithNamesAndTypes::isGarbageAfterField(size_t index, ReadBuffer::Position pos)
+template <typename FormatReaderImpl>
+bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::isGarbageAfterField(size_t index, ReadBuffer::Position pos)
 {
     return format_reader->isGarbageAfterField(index, pos);
 }
 
-void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_)
+template <typename FormatReaderImpl>
+void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::setReadBuffer(ReadBuffer & in_)
 {
     format_reader->setReadBuffer(in_);
     IInputFormat::setReadBuffer(in_);
@@ -582,5 +598,12 @@ void FormatWithNamesAndTypesSchemaReader::transformTypesIfNeeded(DB::DataTypePtr
     transformInferredTypesIfNeeded(type, new_type, format_settings);
 }
 
+template class RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>;
+template class RowInputFormatWithNamesAndTypes<JSONCompactEachRowFormatReader>;
+template class RowInputFormatWithNamesAndTypes<TabSeparatedFormatReader>;
+template class RowInputFormatWithNamesAndTypes<CSVFormatReader>;
+template class RowInputFormatWithNamesAndTypes<CustomSeparatedFormatReader>;
+template class RowInputFormatWithNamesAndTypes<BinaryFormatReader<true>>;
+template class RowInputFormatWithNamesAndTypes<BinaryFormatReader<false>>;
 }
 
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
index b7d9507151e..cd836cb00dc 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@@ -26,6 +26,7 @@ class FormatWithNamesAndTypesReader;
 ///    will be compared types from header.
 /// It's important that firstly this class reads/skips names and only
 /// then reads/skips types. So you can this invariant.
+template <typename FormatReaderImpl>
 class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo
 {
 protected:
@@ -41,7 +42,7 @@ protected:
         bool with_names_,
         bool with_types_,
         const FormatSettings & format_settings_,
-        std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_,
+        std::unique_ptr<FormatReaderImpl> format_reader_,
         bool try_detect_header_ = false);
 
     void resetParser() override;
@@ -70,7 +71,7 @@ private:
     bool is_header_detected = false;
 
 protected:
-    std::unique_ptr<FormatWithNamesAndTypesReader> format_reader;
+    std::unique_ptr<FormatReaderImpl> format_reader;
     Block::NameMap column_indexes_by_names;
 };
 

From 370b6bdc7b6d97f0e697e99ccd06a25e97651406 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 16 Aug 2024 13:38:30 +0000
Subject: [PATCH 006/114] Update tests

---
 tests/queries/0_stateless/02497_schema_inference_nulls.sql    | 4 ++--
 .../0_stateless/02784_schema_inference_null_as_default.sql    | 4 ++--
 ..._max_bytes_to_read_for_schema_inference_in_cache.reference | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql
index b78b5709dbb..5670b031e8b 100644
--- a/tests/queries/0_stateless/02497_schema_inference_nulls.sql
+++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql
@@ -40,7 +40,7 @@ desc format(JSONCompactEachRow, '[[[], [null], [1, 2, 3]]]');
 desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]');
 desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]');
 
-set schema_inference_make_columns_nullable=0;
+set schema_inference_make_columns_nullable='auto';
 desc format(JSONCompactEachRow, '[[1, 2]]');
 desc format(JSONCompactEachRow, '[[null, 1]]');
 desc format(JSONCompactEachRow, '[[1, 2]], [[3]]');
@@ -59,7 +59,7 @@ desc format(CSV, '"[[], [null], [1, 2, 3]]"');
 desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"');
 desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"');
 
-set schema_inference_make_columns_nullable=0;
+set schema_inference_make_columns_nullable='auto';
 desc format(CSV, '"[1,2]"');
 desc format(CSV, '"[NULL, 1]"');
 desc format(CSV, '"[1, 2]"\n"[3]"');
diff --git a/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql b/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql
index 9c9f99d8283..571e3ab4f25 100644
--- a/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql
+++ b/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql
@@ -1,7 +1,7 @@
 desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1;
 select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1;
-desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0;
-select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0;
+desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0;
+select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0;
 desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1;
 select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1;
 
diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference
index cd109daac52..3b9d88edc19 100644
--- a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference
+++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference
@@ -1,2 +1,2 @@
 x	Nullable(Int64)					
-schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false
+schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=1, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false

From 5fe46af4221a36ecb4566ca7bfad314d732f1de2 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 16 Aug 2024 18:12:51 +0200
Subject: [PATCH 007/114] Update 02497_schema_inference_nulls.sql

---
 tests/queries/0_stateless/02497_schema_inference_nulls.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql
index 5670b031e8b..d62fc76d9b9 100644
--- a/tests/queries/0_stateless/02497_schema_inference_nulls.sql
+++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql
@@ -18,7 +18,7 @@ desc format(JSONEachRow, '{"x" : [[], [null], [1, 2, 3]]}');
 desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}');
 desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}');
 
-set schema_inference_make_columns_nullable=0;
+set schema_inference_make_columns_nullable='auto';
 desc format(JSONEachRow, '{"x" : [1, 2]}');
 desc format(JSONEachRow, '{"x" : [null, 1]}');
 desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}');

From 00a27669df3f2153401d62b4d1681914bd0f440a Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 19 Aug 2024 20:22:14 +0200
Subject: [PATCH 008/114] Fix builds

---
 src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 69b0c095ad1..6263351897e 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -1258,6 +1258,7 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam
         .date_time_overflow_behavior = date_time_overflow_behavior,
         .allow_arrow_null_type = true,
         .skip_columns_with_unsupported_types = false
+        .allow_inferring_nullable_columns = true;
     };
 
     Columns columns;

From 0ccbb554b9d0b7055415569559e029060261243e Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Tue, 20 Aug 2024 10:58:14 +0200
Subject: [PATCH 009/114] Update 02995_index_7.sh

---
 tests/queries/0_stateless/02995_index_7.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02995_index_7.sh b/tests/queries/0_stateless/02995_index_7.sh
index a5fdd98b2f8..7a03b0d4c1a 100755
--- a/tests/queries/0_stateless/02995_index_7.sh
+++ b/tests/queries/0_stateless/02995_index_7.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
+# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage, no-distributed-cache
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 24eeaffa7a3ddbfa0fb7bc4546942bc18cab06af Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:02:09 +0200
Subject: [PATCH 010/114] init

---
 src/Storages/VirtualColumnUtils.cpp           |  11 +++++-----
 .../03203_hive_style_partitioning.reference   |   1 +
 .../03203_hive_style_partitioning.sh          |  19 +++++++++---------
 .../partitioning/a=b/a=b/sample.parquet       | Bin 0 -> 1308 bytes
 4 files changed, 17 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index d932f5cc469..edf50907752 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -136,14 +136,15 @@ std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(
 
     std::unordered_map<std::string, std::string> key_values;
     std::string key, value;
-    std::unordered_set<String> used_keys;
+    std::unordered_map<std::string, std::string> used_keys;
     while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
     {
-        if (used_keys.contains(key))
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {}, only unique keys are allowed", path, key);
-        used_keys.insert(key);
+        auto it = used_keys.find(key);
+        if (it != used_keys.end() && it->second != value)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key);
+        used_keys.insert({key, value});
 
-        auto col_name = "_" + key;
+        auto col_name = key;
         while (storage_columns.has(col_name))
             col_name = "_" + col_name;
         key_values[col_name] = value;
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index a4a2e48e046..12ffd17c102 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -39,6 +39,7 @@ Array(Int64)	LowCardinality(Float64)
 2070
 1
 1
+b
 TESTING THE URL PARTITIONING
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index db1f073d736..5a0bd482985 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -27,26 +27,27 @@ SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/c
 SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
 
-SELECT _number, _date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
-SELECT _array, _float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
-SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
-SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42;
+SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
+SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
+SELECT toTypeName(array), toTypeName(float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
+SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE number = 42;
 """
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
 SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
-SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
+SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
+SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "INCORRECT_DATA"
 
 $CLICKHOUSE_LOCAL -n -q """
@@ -78,7 +79,7 @@ SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/colum
 SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
+SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
@@ -109,7 +110,7 @@ SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column
 SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
 """
 
diff --git a/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001


From e416a2b3d2f9ff2395a218e79f9417cb96dafbda Mon Sep 17 00:00:00 2001
From: leonkozlowski <leon.kozlowski@flocksafety.com>
Date: Tue, 20 Aug 2024 09:42:19 -0400
Subject: [PATCH 011/114] patch: fix reference to sorting key in primary key
 docs

---
 docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 183b94f4641..0b693775dde 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -80,7 +80,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
 `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
 
 Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key.
-It is usually not necessary to specify the primary key in addition to the primary key.
+It is usually not necessary to specify the primary key in addition to the sorting key.
 
 #### SAMPLE BY
 

From fe637452ec730db224e40a5c4a399d9ff7ac4ca0 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Tue, 20 Aug 2024 19:54:12 +0200
Subject: [PATCH 012/114] Revert "Fix test `01079_bad_alters_zookeeper_long`"

---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 tests/clickhouse-test                       | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 80a7e862f72..ff8e362aa36 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -6340,7 +6340,7 @@ void StorageReplicatedMergeTree::alter(
                                 "Metadata on replica is not up to date with common metadata in Zookeeper. "
                                 "It means that this replica still not applied some of previous alters."
                                 " Probably too many alters executing concurrently (highly not recommended). "
-                                "You can retry the query");
+                                "You can retry this error");
 
             /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level.
             if (query_context->getZooKeeperMetadataTransaction())
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 5fb892597f7..01c2937352f 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -59,7 +59,6 @@ MESSAGES_TO_RETRY = [
     "is already started to be removing by another replica right now",
     # This is from LSan, and it indicates its own internal problem:
     "Unable to get registers from thread",
-    "You can retry",
 ]
 
 MAX_RETRIES = 3

From 2ad50a5f3c6a1da2e33aee32051d654e789b8ca3 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 20 Aug 2024 19:56:22 +0200
Subject: [PATCH 013/114] Update 01079_bad_alters_zookeeper_long.sh

---
 tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh b/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh
index 82b8be65af5..39e65af039b 100755
--- a/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh
+++ b/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh
@@ -26,6 +26,10 @@ while [[ $($CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='00000000
     sleep 1
 done
 
+while [[ $($CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE type='ALTER_METADATA' AND database = '$CLICKHOUSE_DATABASE'" 2>&1) ]]; do
+    sleep 1
+done
+
 $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE table_for_bad_alters;" # Type changed, but we can revert back
 
 $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_bad_alters VALUES(2, 2, 7)"

From 434458cc830d2ced68f1f96dcfa13f967c9bc74e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 16 Aug 2024 11:22:22 +0000
Subject: [PATCH 014/114] Remove -n / --multiquery

---
 .../00115_shard_in_incomplete_result.sh            |  2 +-
 .../0_stateless/00550_join_insert_select.sh        |  2 +-
 .../0_stateless/01086_window_view_cleanup.sh       |  2 +-
 ...396_inactive_replica_cleanup_nodes_zookeeper.sh |  4 ++--
 .../0_stateless/01455_opentelemetry_distributed.sh |  2 +-
 .../0_stateless/01509_format_raw_blob.reference    |  4 ++--
 tests/queries/0_stateless/01509_format_raw_blob.sh |  4 ++--
 .../01565_query_loop_after_client_error.expect     |  2 +-
 .../01811_storage_buffer_flush_parameters.sh       |  6 +++---
 ...3_correct_block_size_prediction_with_default.sh |  4 ++--
 .../02020_alter_table_modify_comment.sh            |  2 +-
 ...e_sorting_by_input_stream_properties_explain.sh |  4 ++--
 .../0_stateless/02383_join_and_filtering_set.sh    |  2 +-
 .../0_stateless/02423_ddl_for_opentelemetry.sh     |  2 +-
 tests/queries/0_stateless/02539_settings_alias.sh  |  4 ++--
 .../02697_stop_reading_on_first_cancel.sh          |  2 +-
 .../0_stateless/02703_row_policies_for_asterisk.sh |  2 +-
 .../02703_row_policies_for_database_combination.sh |  2 +-
 .../0_stateless/02703_row_policy_for_database.sh   |  4 ++--
 tests/queries/0_stateless/02724_delay_mutations.sh |  6 +++---
 ...02765_queries_with_subqueries_profile_events.sh | 14 +++++++-------
 .../queries/0_stateless/02841_not_ready_set_bug.sh |  4 ++--
 .../0_stateless/02871_peak_threads_usage.sh        | 14 +++++++-------
 .../0_stateless/02911_backup_restore_keeper_map.sh |  6 +++---
 .../0_stateless/02968_file_log_multiple_read.sh    |  4 ++--
 .../03002_part_log_rmt_fetch_merge_error.sh        |  8 ++++----
 .../03002_part_log_rmt_fetch_mutate_error.sh       | 10 +++++-----
 .../03164_selects_with_pk_usage_profile_event.sh   |  8 ++++----
 .../0_stateless/03172_system_detached_tables.sh    |  4 ++--
 .../03173_parallel_replicas_join_bug.sh            |  2 +-
 30 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh b/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh
index 5c3918dea9f..4916721764c 100755
--- a/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh
+++ b/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 set -o errexit
 set -o pipefail
 
-$CLICKHOUSE_CLIENT -n --query="
+$CLICKHOUSE_CLIENT --query="
     DROP TABLE IF EXISTS users;
     CREATE TABLE users (UserID UInt64) ENGINE = Log;
     INSERT INTO users VALUES (1468013291393583084);
diff --git a/tests/queries/0_stateless/00550_join_insert_select.sh b/tests/queries/0_stateless/00550_join_insert_select.sh
index bfaccb613ca..ee2f3ab286b 100755
--- a/tests/queries/0_stateless/00550_join_insert_select.sh
+++ b/tests/queries/0_stateless/00550_join_insert_select.sh
@@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -n --ignore-error --query="
+$CLICKHOUSE_CLIENT --ignore-error --query="
 DROP TABLE IF EXISTS test1_00550;
 DROP TABLE IF EXISTS test2_00550;
 DROP TABLE IF EXISTS test3_00550;
diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh
index 8b8e794c8ff..1bfa3c50869 100755
--- a/tests/queries/0_stateless/01086_window_view_cleanup.sh
+++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh
@@ -13,7 +13,7 @@ opts=(
 
 DATABASE_ORDINARY="${CLICKHOUSE_DATABASE}_ordinary"
 
-$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 --multiquery "
+$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 "
     SET allow_experimental_window_view = 1;
     SET window_view_clean_interval = 1;
 
diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index bff85b3e29f..4a0b6a8c93c 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -13,7 +13,7 @@ REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')")
 
 SCALE=1000
 
-$CLICKHOUSE_CLIENT -n --query "
+$CLICKHOUSE_CLIENT --query "
     DROP TABLE IF EXISTS r1;
     DROP TABLE IF EXISTS r2;
     CREATE TABLE r1 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard}', '1{replica}') ORDER BY x
@@ -46,7 +46,7 @@ $CLICKHOUSE_CLIENT --receive_timeout 600 --query "SYSTEM SYNC REPLICA r2" # Need
 
 $CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/2$REPLICA' AND name = 'is_lost'";
 
-$CLICKHOUSE_CLIENT -n --query "
+$CLICKHOUSE_CLIENT --query "
     DROP TABLE IF EXISTS r1;
     DROP TABLE IF EXISTS r2;
 "
diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
index 2b6da6132ed..30940f93a56 100755
--- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
+++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 function check_log
 {
-${CLICKHOUSE_CLIENT} --format=JSONEachRow -nq "
+${CLICKHOUSE_CLIENT} --format=JSONEachRow -q "
 set enable_analyzer = 1;
 system flush logs;
 
diff --git a/tests/queries/0_stateless/01509_format_raw_blob.reference b/tests/queries/0_stateless/01509_format_raw_blob.reference
index 05014001bd9..eb074457e07 100644
--- a/tests/queries/0_stateless/01509_format_raw_blob.reference
+++ b/tests/queries/0_stateless/01509_format_raw_blob.reference
@@ -1,2 +1,2 @@
-9fd46251e5574c633cbfbb9293671888  -
-9fd46251e5574c633cbfbb9293671888  -
+48fad37bc89fc3bcc29c4750897b6709  -
+48fad37bc89fc3bcc29c4750897b6709  -
diff --git a/tests/queries/0_stateless/01509_format_raw_blob.sh b/tests/queries/0_stateless/01509_format_raw_blob.sh
index 3d1d3fbb17b..355928014e8 100755
--- a/tests/queries/0_stateless/01509_format_raw_blob.sh
+++ b/tests/queries/0_stateless/01509_format_raw_blob.sh
@@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-${CLICKHOUSE_CLIENT} -n --query "
+${CLICKHOUSE_CLIENT} --query "
 DROP TABLE IF EXISTS t;
 CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory;
 "
@@ -12,7 +12,7 @@ CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory;
 ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT RawBLOB" < ${BASH_SOURCE[0]}
 
 cat ${BASH_SOURCE[0]} | md5sum
-${CLICKHOUSE_CLIENT} -n --query "SELECT * FROM t FORMAT RawBLOB" | md5sum
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM t FORMAT RawBLOB" | md5sum
 
 ${CLICKHOUSE_CLIENT} --query "
 DROP TABLE t;
diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
index 6253840c63c..f08ef911da4 100755
--- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
+++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
@@ -24,7 +24,7 @@ expect_after {
     -i $any_spawn_id timeout { exit 1 }
 }
 
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -m --history_file=$history_file --highlight 0"
 expect "\n:) "
 
 send -- "DROP TABLE IF EXISTS t01565;\r"
diff --git a/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh b/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh
index 6a5949741ab..7878867e159 100755
--- a/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh
+++ b/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh
@@ -17,7 +17,7 @@ function wait_with_limit()
     done
 }
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     drop table if exists data_01811;
     drop table if exists buffer_01811;
 
@@ -39,9 +39,9 @@ $CLICKHOUSE_CLIENT -nm -q "
 # wait for background buffer flush
 wait_with_limit 30 '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01811") -gt 0 ]]'
 
-$CLICKHOUSE_CLIENT -nm -q "select count() from data_01811"
+$CLICKHOUSE_CLIENT -m -q "select count() from data_01811"
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     drop table buffer_01811;
     drop table data_01811;
 "
diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
index 1482730af2c..57f9b5595de 100755
--- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
+++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 sql="toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1]))"
 
 # Create the table and fill it
-$CLICKHOUSE_CLIENT -n --query="
+$CLICKHOUSE_CLIENT --query="
     CREATE TABLE test_extract(str String,  arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=(\"[^\"]*?\"|[^\",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple();
     INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000);
     ALTER TABLE test_extract ADD COLUMN 15Id Nullable(UInt16) DEFAULT $sql;"
@@ -24,7 +24,7 @@ function test()
     $CLICKHOUSE_CLIENT --query="SELECT uniq(15Id) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_1
     uuid_2=$(cat /proc/sys/kernel/random/uuid)
     $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2
-    $CLICKHOUSE_CLIENT -n --query="
+    $CLICKHOUSE_CLIENT --query="
         SYSTEM FLUSH LOGS;
         WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1),
              memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2)
diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
index 3448f052f51..fa2d84e131a 100755
--- a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
+++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
@@ -16,7 +16,7 @@ function test_table_comments()
     local ENGINE_NAME="$1"
     echo "engine : ${ENGINE_NAME}"
 
-    $CLICKHOUSE_CLIENT -nm <<EOF
+    $CLICKHOUSE_CLIENT -m <<EOF
     DROP TABLE IF EXISTS comment_test_table;
 
     CREATE TABLE comment_test_table
diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh
index 974f10e2f24..52f48dcbb91 100755
--- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh
+++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh
@@ -20,9 +20,9 @@ function explain_sorting {
 
 function explain_sortmode {
     echo "-- QUERY: "$1
-    $CLICKHOUSE_CLIENT --enable_analyzer=0 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTMODE
+    $CLICKHOUSE_CLIENT --enable_analyzer=0 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -q "$1" | eval $FIND_SORTMODE
     echo "-- QUERY (analyzer): "$1
-    $CLICKHOUSE_CLIENT --enable_analyzer=1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTMODE
+    $CLICKHOUSE_CLIENT --enable_analyzer=1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -q "$1" | eval $FIND_SORTMODE
 }
 
 $CLICKHOUSE_CLIENT -q "drop table if exists optimize_sorting sync"
diff --git a/tests/queries/0_stateless/02383_join_and_filtering_set.sh b/tests/queries/0_stateless/02383_join_and_filtering_set.sh
index 3a6d60811c9..a3f12381c97 100755
--- a/tests/queries/0_stateless/02383_join_and_filtering_set.sh
+++ b/tests/queries/0_stateless/02383_join_and_filtering_set.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -mn -q """
+$CLICKHOUSE_CLIENT -m -q """
 DROP TABLE IF EXISTS t1;
 DROP TABLE IF EXISTS t2;
 
diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh
index 7136698d5b7..b23c4f376fc 100755
--- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh
+++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh
@@ -39,7 +39,7 @@ function check_span()
         extra_condition=""
     fi
 
-    ret=$(${CLICKHOUSE_CLIENT} -nq "
+    ret=$(${CLICKHOUSE_CLIENT} -q "
         SYSTEM FLUSH LOGS;
 
         SELECT count()
diff --git a/tests/queries/0_stateless/02539_settings_alias.sh b/tests/queries/0_stateless/02539_settings_alias.sh
index 5cfa09ad0fa..c770633c0ac 100755
--- a/tests/queries/0_stateless/02539_settings_alias.sh
+++ b/tests/queries/0_stateless/02539_settings_alias.sh
@@ -10,7 +10,7 @@ for check_query in "SELECT value FROM system.settings WHERE name = 'alter_sync';
     echo "Checking setting value with '$check_query'"
 
     echo 'Using SET'
-    $CLICKHOUSE_CLIENT -mn -q """
+    $CLICKHOUSE_CLIENT -m -q """
     SET replication_alter_partitions_sync = 0;
     $check_query
 
@@ -28,7 +28,7 @@ for check_query in "SELECT value FROM system.settings WHERE name = 'alter_sync';
 done
 
 
-$CLICKHOUSE_CLIENT -mn -q """
+$CLICKHOUSE_CLIENT -m -q """
 DROP VIEW IF EXISTS 02539_settings_alias_view;
 CREATE VIEW 02539_settings_alias_view AS SELECT 1 SETTINGS replication_alter_partitions_sync = 2;
 SHOW CREATE TABLE 02539_settings_alias_view;
diff --git a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
index 5a2cec08eca..2de267a79d7 100755
--- a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
+++ b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 QUERY_ID="${CLICKHOUSE_DATABASE}_read_with_cancel"
 
-$CLICKHOUSE_CLIENT --max_rows_to_read 0 -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" &
+$CLICKHOUSE_CLIENT --max_rows_to_read 0 --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" &
 pid=$!
 
 for _ in {0..60}
diff --git a/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh b/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh
index f9670e5f6f8..bb75ab5041b 100755
--- a/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh
+++ b/tests/queries/0_stateless/02703_row_policies_for_asterisk.sh
@@ -3,7 +3,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
     SELECT 'Policy for table \`*\` does not affect other tables in the database';
     CREATE ROW POLICY 02703_asterisk_${CLICKHOUSE_DATABASE}_policy ON ${CLICKHOUSE_DATABASE}.\`*\` USING x=1 AS permissive TO ALL;
     CREATE TABLE ${CLICKHOUSE_DATABASE}.\`*\` (x UInt8, y UInt8) ENGINE = MergeTree ORDER BY x AS SELECT 100, 20;
diff --git a/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh b/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh
index 35151eed220..756f71fc043 100755
--- a/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh
+++ b/tests/queries/0_stateless/02703_row_policies_for_database_combination.sh
@@ -3,7 +3,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 
 DROP TABLE IF EXISTS 02703_rptable;
 DROP TABLE IF EXISTS 02703_rptable_another;
diff --git a/tests/queries/0_stateless/02703_row_policy_for_database.sh b/tests/queries/0_stateless/02703_row_policy_for_database.sh
index e94bc7acd5e..c29fa313825 100755
--- a/tests/queries/0_stateless/02703_row_policy_for_database.sh
+++ b/tests/queries/0_stateless/02703_row_policy_for_database.sh
@@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 CLICKHOUSE_USER="user_$CLICKHOUSE_DATABASE"
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 
 DROP USER IF EXISTS ${CLICKHOUSE_USER};
 CREATE USER ${CLICKHOUSE_USER};
@@ -28,7 +28,7 @@ DROP POLICY ${CLICKHOUSE_DATABASE}_tb_policy ON ${CLICKHOUSE_DATABASE}.table;
 
 $CLICKHOUSE_CLIENT --query "CREATE ROW POLICY any_02703 ON *.some_table USING 1 AS PERMISSIVE TO ALL;" 2>&1 | grep -q "SYNTAX_ERROR"
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 CREATE TABLE 02703_rqtable_default (x UInt8) ENGINE = MergeTree ORDER BY x;
 
 CREATE ROW POLICY ${CLICKHOUSE_DATABASE}_filter_11_db_policy ON * USING x=1 AS permissive TO ALL;
diff --git a/tests/queries/0_stateless/02724_delay_mutations.sh b/tests/queries/0_stateless/02724_delay_mutations.sh
index f349e29253a..7843e692822 100755
--- a/tests/queries/0_stateless/02724_delay_mutations.sh
+++ b/tests/queries/0_stateless/02724_delay_mutations.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=./mergetree_mutations.lib
 . "$CURDIR"/mergetree_mutations.lib
 
-${CLICKHOUSE_CLIENT} -n --query "
+${CLICKHOUSE_CLIENT} --query "
 DROP TABLE IF EXISTS t_delay_mutations SYNC;
 
 CREATE TABLE t_delay_mutations (id UInt64, v UInt64)
@@ -36,14 +36,14 @@ SELECT count() FROM system.mutations WHERE database = currentDatabase() AND tabl
 ${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_delay_mutations"
 wait_for_mutation "t_delay_mutations" "mutation_5.txt"
 
-${CLICKHOUSE_CLIENT} -n --query "
+${CLICKHOUSE_CLIENT} --query "
 SELECT * FROM t_delay_mutations ORDER BY id;
 SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_delay_mutations' AND NOT is_done;
 
 DROP TABLE IF EXISTS t_delay_mutations SYNC;
 "
 
-${CLICKHOUSE_CLIENT} -n --query "
+${CLICKHOUSE_CLIENT} --query "
 SYSTEM FLUSH LOGS;
 
 SELECT
diff --git a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh
index b7d93b5396c..fd64e8d8cb8 100755
--- a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh
+++ b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh
@@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -n -q "
+$CLICKHOUSE_CLIENT -q "
     DROP TABLE IF EXISTS mv;
     DROP TABLE IF EXISTS output;
     DROP TABLE IF EXISTS input;
@@ -17,7 +17,7 @@ $CLICKHOUSE_CLIENT -n -q "
 for enable_analyzer in 0 1; do
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "INSERT INTO input SELECT * FROM numbers(1)"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 view,
@@ -35,7 +35,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "SELECT * FROM system.one WHERE dummy IN (SELECT * FROM system.one) FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 subquery,
@@ -52,7 +52,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 CSE,
@@ -69,7 +69,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x, x FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 CSE_Multi,
@@ -86,7 +86,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 CTE,
@@ -103,7 +103,7 @@ for enable_analyzer in 0 1; do
 
     query_id="$(random_str 10)"
     $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x UNION ALL SELECT * FROM x FORMAT Null"
-    $CLICKHOUSE_CLIENT -mn -q "
+    $CLICKHOUSE_CLIENT -m -q "
         SYSTEM FLUSH LOGS;
         SELECT
             1 CTE_Multi,
diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh
index 556e2f52de2..d5a2d034014 100755
--- a/tests/queries/0_stateless/02841_not_ready_set_bug.sh
+++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh
@@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE 1 in (SELECT number fro
 $CLICKHOUSE_CLIENT -q "SELECT xor(1, 0) FROM system.parts WHERE 1 IN (SELECT 1) FORMAT Null"
 
 # (Not all of these tests are effective because some of these tables are empty.)
-$CLICKHOUSE_CLIENT -nq "
+$CLICKHOUSE_CLIENT -q "
     select * from system.columns where table in (select '123');
     select * from system.replicas where database in (select '123');
     select * from system.data_skipping_indices where database in (select '123');
@@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -nq "
     select * from system.replication_queue where database in (select '123');
     select * from system.distribution_queue where database in (select '123');
 "
-$CLICKHOUSE_CLIENT -nq "
+$CLICKHOUSE_CLIENT -q "
     create table a (x Int8) engine MergeTree order by x;
     insert into a values (1);
     select * from mergeTreeIndex(currentDatabase(), 'a') where part_name in (select '123');
diff --git a/tests/queries/0_stateless/02871_peak_threads_usage.sh b/tests/queries/0_stateless/02871_peak_threads_usage.sh
index dfb3e665020..0f0473bbb47 100755
--- a/tests/queries/0_stateless/02871_peak_threads_usage.sh
+++ b/tests/queries/0_stateless/02871_peak_threads_usage.sh
@@ -26,7 +26,7 @@ ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_6" --query='SELECT * FROM nu
 ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_7" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 1, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}"
 ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_8" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 4, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -mn --query="""
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -m --query="""
 SELECT count() FROM 
     (SELECT number FROM numbers_mt(1,100000) 
             UNION ALL SELECT number FROM numbers_mt(10000, 200000)
@@ -38,7 +38,7 @@ SELECT count() FROM
             UNION ALL SELECT number FROM numbers_mt(300000, 4000000)
     ) SETTINGS max_threads = 1""" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -mn --query="""
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -m --query="""
 SELECT count() FROM 
     (SELECT number FROM numbers_mt(1,100000) 
             UNION ALL SELECT number FROM numbers_mt(10000, 2000)
@@ -50,7 +50,7 @@ SELECT count() FROM
             UNION ALL SELECT number FROM numbers_mt(300000, 4000000)
     ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -mn --query="""
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -m --query="""
 SELECT count() FROM 
     (SELECT number FROM numbers_mt(1,100000) 
             UNION ALL SELECT number FROM numbers_mt(1, 1)
@@ -62,20 +62,20 @@ SELECT count() FROM
             UNION ALL SELECT number FROM numbers_mt(1, 4000000)
     ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -mn --query="""
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -m --query="""
 SELECT sum(number) FROM numbers_mt(100000)
 GROUP BY number % 2
 WITH TOTALS ORDER BY number % 2
 SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}"
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -m --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}"
 
-${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}"
+${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -m --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}"
 
 ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
 for i in {1..14}
 do
-    ${CLICKHOUSE_CLIENT} -mn --query="""
+    ${CLICKHOUSE_CLIENT} -m --query="""
     SELECT '${i}',
            peak_threads_usage, 
            (select count() from system.query_thread_log WHERE system.query_thread_log.query_id = '${UNIQUE_QUERY_ID}_${i}' AND current_database = currentDatabase()) = length(thread_ids),
diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
index c04667505c3..01aba244a02 100755
--- a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
+++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh
@@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 database_name="$CLICKHOUSE_DATABASE"_02911_keeper_map
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     DROP DATABASE IF EXISTS $database_name;
     CREATE DATABASE $database_name;
     CREATE TABLE $database_name.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key;
@@ -13,9 +13,9 @@ $CLICKHOUSE_CLIENT -nm -q "
     CREATE TABLE $database_name.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key;
 "
 
-$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;"
+$CLICKHOUSE_CLIENT -m -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;"
 
-$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;"
+$CLICKHOUSE_CLIENT -m -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;"
 
 backup_path="$database_name"
 for i in $(seq 1 3); do
diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh
index d9bae05270a..0879bf02d60 100755
--- a/tests/queries/0_stateless/02968_file_log_multiple_read.sh
+++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh
@@ -15,7 +15,7 @@ do
 	echo $i >> ${logs_dir}/a.txt
 done
 
-${CLICKHOUSE_CLIENT} -n --query="
+${CLICKHOUSE_CLIENT} --query="
 DROP TABLE IF EXISTS file_log;
 DROP TABLE IF EXISTS table_to_store_data;
 DROP TABLE IF EXISTS file_log_mv;
@@ -69,7 +69,7 @@ done
 
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM table_to_store_data ORDER BY id;"
 
-${CLICKHOUSE_CLIENT} -n --query="
+${CLICKHOUSE_CLIENT} --query="
 DROP TABLE file_log;
 DROP TABLE table_to_store_data;
 DROP TABLE file_log_mv;
diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh
index e58c542b8ac..185e46a2eac 100755
--- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh
+++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh
@@ -11,13 +11,13 @@ set -e
 function wait_until()
 {
     local q=$1 && shift
-    while [ "$($CLICKHOUSE_CLIENT -nm -q "$q")" != "1" ]; do
+    while [ "$($CLICKHOUSE_CLIENT -m -q "$q")" != "1" ]; do
         # too frequent FLUSH LOGS is too costly
         sleep 2
     done
 }
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     drop table if exists rmt_master;
     drop table if exists rmt_slave;
 
@@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT -nm -q "
     optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1;
 "
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     system flush logs;
     select 'before';
     select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3;
@@ -42,7 +42,7 @@ $CLICKHOUSE_CLIENT -nm -q "
 "
 # wait until rmt_slave will fetch the part and reflect this error in system.part_log
 wait_until "system flush logs; select count()>0 from system.part_log where table = 'rmt_slave' and database = '$CLICKHOUSE_DATABASE' and error > 0"
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     system sync replica rmt_slave;
 
     system flush logs;
diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh
index cc8f53aafb9..e731d51e7e3 100755
--- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh
+++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh
@@ -14,13 +14,13 @@ set -e
 function wait_until()
 {
     local q=$1 && shift
-    while [ "$($CLICKHOUSE_CLIENT -nm -q "$q")" != "1" ]; do
+    while [ "$($CLICKHOUSE_CLIENT -m -q "$q")" != "1" ]; do
         # too frequent FLUSH LOGS is too costly
         sleep 2
     done
 }
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     drop table if exists rmt_master;
     drop table if exists rmt_slave;
 
@@ -41,10 +41,10 @@ $CLICKHOUSE_CLIENT -nm -q "
 # the part, and rmt_slave will consider it instead of performing mutation on
 # it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored
 wait_for_mutation rmt_master 0000000000
-$CLICKHOUSE_CLIENT -nm -q "system start pulling replication log rmt_slave"
+$CLICKHOUSE_CLIENT -m -q "system start pulling replication log rmt_slave"
 # and wait until rmt_slave to fetch the part and reflect this error in system.part_log
 wait_until "system flush logs; select count()>0 from system.part_log where table = 'rmt_slave' and database = '$CLICKHOUSE_DATABASE' and error > 0"
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     system flush logs;
     select 'before';
     select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3;
@@ -52,7 +52,7 @@ $CLICKHOUSE_CLIENT -nm -q "
     system start replicated sends rmt_master;
 "
 wait_for_mutation rmt_slave 0000000000
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -m -q "
     system sync replica rmt_slave;
 
     system flush logs;
diff --git a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh
index 29d4c877909..75efc3f057a 100755
--- a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh
+++ b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh
@@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT -q "
 query_id="$(random_str 10)"
 $CLICKHOUSE_CLIENT --query_id "$query_id" -q "
     SELECT count(*) FROM table_$table_id FORMAT Null;"
-$CLICKHOUSE_CLIENT -mn -q "
+$CLICKHOUSE_CLIENT -m -q "
     SYSTEM FLUSH LOGS;
     SELECT
         ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
@@ -50,7 +50,7 @@ $CLICKHOUSE_CLIENT -mn -q "
 query_id="$(random_str 10)"
 $CLICKHOUSE_CLIENT --query_id "$query_id" -q "
     SELECT count(*) FROM table_$table_id WHERE col2 >= 50000 FORMAT Null;"
-$CLICKHOUSE_CLIENT -mn -q "
+$CLICKHOUSE_CLIENT -m -q "
     SYSTEM FLUSH LOGS;
     SELECT
         ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
@@ -67,7 +67,7 @@ $CLICKHOUSE_CLIENT -mn -q "
 query_id="$(random_str 10)"
 $CLICKHOUSE_CLIENT --query_id "$query_id" -q "
     SELECT count(*) FROM table_$table_id WHERE pk >= 50000 FORMAT Null;"
-$CLICKHOUSE_CLIENT -mn -q "
+$CLICKHOUSE_CLIENT -m -q "
     SYSTEM FLUSH LOGS;
     SELECT
         ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
@@ -84,7 +84,7 @@ $CLICKHOUSE_CLIENT -mn -q "
 query_id="$(random_str 10)"
 $CLICKHOUSE_CLIENT --query_id "$query_id" -q "
     SELECT count(*) FROM table_$table_id WHERE col1 >= 50000 FORMAT Null;"
-$CLICKHOUSE_CLIENT -mn -q "
+$CLICKHOUSE_CLIENT -m -q "
     SYSTEM FLUSH LOGS;
     SELECT
         ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage
diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sh b/tests/queries/0_stateless/03172_system_detached_tables.sh
index 47775abcc45..60e913b62a8 100755
--- a/tests/queries/0_stateless/03172_system_detached_tables.sh
+++ b/tests/queries/0_stateless/03172_system_detached_tables.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 DATABASE_ATOMIC="${CLICKHOUSE_DATABASE}_atomic"
 DATABASE_LAZY="${CLICKHOUSE_DATABASE}_lazy"
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 
 SELECT 'database atomic tests';
 DROP DATABASE IF EXISTS ${DATABASE_ATOMIC};
@@ -36,7 +36,7 @@ DROP DATABASE ${DATABASE_ATOMIC} SYNC;
 
 "
 
-$CLICKHOUSE_CLIENT --multiquery "
+$CLICKHOUSE_CLIENT "
 
 SELECT '-----------------------';
 SELECT 'database lazy tests';
diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh
index af702569794..d2be9899f86 100755
--- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh
+++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh
@@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT -q "
   INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED');
 "
 
-$CLICKHOUSE_CLIENT -nq "
+$CLICKHOUSE_CLIENT -q "
 SET enable_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 10, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1;
 
 SELECT

From 539d04c90f30efa0ef6435373ec8ffc4777aee78 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 20 Aug 2024 20:00:23 +0000
Subject: [PATCH 015/114] Disable min_bytes_to_use_direct_io in some tests with
 Dynamic/JSON subcolumns because it's broken

---
 .../03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql    | 2 ++
 .../03036_dynamic_read_subcolumns_wide_merge_tree.sql           | 2 ++
 .../03207_json_read_subcolumns_2_wide_merge_tree.sql.j2         | 2 ++
 ...03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql
index 61dc8fca01a..9e6e0652127 100644
--- a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql
@@ -4,6 +4,8 @@ set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
 set allow_experimental_dynamic_type = 1;
 
+set min_bytes_to_use_direct_io = 0; -- min_bytes_to_use_direct_io > 0 is broken
+
 drop table if exists test;
 create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;
 
diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql
index 5aac5f7b72f..44ceac1e5ad 100644
--- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql
@@ -4,6 +4,8 @@ set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
 set allow_experimental_dynamic_type = 1;
 
+set min_bytes_to_use_direct_io = 0; -- min_bytes_to_use_direct_io > 0 is broken
+
 drop table if exists test;
 create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;
 
diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2
index ab4e0437c15..b31e57753c0 100644
--- a/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2
+++ b/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2
@@ -5,6 +5,8 @@ set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
 set session_timezone = 'UTC';
 
+set min_bytes_to_use_direct_io = 0; -- min_bytes_to_use_direct_io > 0 is broken
+
 drop table if exists test;
 create table test (id UInt64, json JSON(max_dynamic_paths=2, a.b.c UInt32)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;
 
diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2
index 3010fa0e2de..c0f34a8ea61 100644
--- a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2
+++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2
@@ -4,6 +4,8 @@ set allow_experimental_json_type = 1;
 set allow_experimental_variant_type = 1;
 set use_variant_as_common_type = 1;
 
+set min_bytes_to_use_direct_io = 0; -- min_bytes_to_use_direct_io > 0 is broken
+
 create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;
 
 insert into test select number, '{}' from numbers(10000);

From 1626589bb35eaca62804eabc5f64d5d55c998edf Mon Sep 17 00:00:00 2001
From: Dergousov <myksik442@gmail.com>
Date: Wed, 21 Aug 2024 00:48:23 +0300
Subject: [PATCH 016/114] feat: add ripeMD160 support

---
 src/Common/RipeMD160Hash.h             | 186 +++++++++++++++++++++++++
 src/Functions/FunctionsHashing.h       |  16 +++
 src/Functions/FunctionsHashingRipe.cpp |  21 +++
 3 files changed, 223 insertions(+)
 create mode 100644 src/Common/RipeMD160Hash.h
 create mode 100644 src/Functions/FunctionsHashingRipe.cpp

diff --git a/src/Common/RipeMD160Hash.h b/src/Common/RipeMD160Hash.h
new file mode 100644
index 00000000000..0ff97dc13b2
--- /dev/null
+++ b/src/Common/RipeMD160Hash.h
@@ -0,0 +1,186 @@
+#pragma once
+
+#include <bit>
+#include <string>
+#include <type_traits>
+#include <Core/Defines.h>
+#include <base/extended_types.h>
+#include <base/hex.h>
+#include <base/types.h>
+#include <base/unaligned.h>
+#include <boost/multiprecision/cpp_int.hpp>
+#include <Common/Exception.h>
+#include <Common/transformEndianness.h>
+
+#include <city.h>
+
+
+class RIPEMD160
+{
+private:
+    UInt8 digest_bytes[20];
+
+    static constexpr UInt32 initial_digest[5] = {0x67452301UL, 0xefcdab89UL, 0x98badcfeUL, 0x10325476UL, 0xc3d2e1f0UL};
+
+    static constexpr UInt8 rho[16] = {0x7, 0x4, 0xd, 0x1, 0xa, 0x6, 0xf, 0x3, 0xc, 0x0, 0x9, 0x5, 0x2, 0xe, 0xb, 0x8};
+
+    static constexpr UInt8 shifts[80]
+        = {11, 14, 15, 12, 5,  8,  7,  9,  11, 13, 14, 15, 6,  7,  9,  8,  12, 13, 11, 15, 6,  9,  9,  7,  12, 15, 11,
+           13, 7,  8,  7,  7,  13, 15, 14, 11, 7,  7,  6,  8,  13, 14, 13, 12, 5,  5,  6,  9,  14, 11, 12, 14, 8,  6,
+           5,  5,  15, 12, 15, 14, 9,  9,  8,  6,  15, 12, 13, 13, 9,  5,  8,  6,  14, 11, 12, 11, 8,  6,  5,  5};
+
+    static constexpr UInt32 constants_left[5] = {0x00000000UL, 0x5a827999UL, 0x6ed9eba1UL, 0x8f1bbcdcUL, 0xa953fd4eUL};
+
+   
+    static ALWAYS_INLINE UInt32 make_le32(UInt32 x) noexcept
+    {
+        if constexpr (std::endian::native == std::endian::little)
+        {
+            return x;
+        }
+        else
+        {
+            return __builtin_bswap32(x);
+        }
+    }
+
+    static constexpr UInt32 constants_right[5] = {0x50a28be6UL, 0x5c4dd124UL, 0x6d703ef3UL, 0x7a6d76e9UL, 0x00000000UL};
+
+    static constexpr UInt8 fns_left[5] = {1, 2, 3, 4, 5};
+    static constexpr UInt8 fns_right[5] = {5, 4, 3, 2, 1};
+
+    static ALWAYS_INLINE UInt32 rol(UInt32 x, UInt32 n) noexcept { return (x << n) | (x >> (32 - n)); }
+
+    static ALWAYS_INLINE UInt32 F_1(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ b ^ c); }
+
+    static ALWAYS_INLINE UInt32 F_2(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a & b) | (~a & c)); }
+
+    static ALWAYS_INLINE UInt32 F_3(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a | ~b) ^ c); }
+
+    static ALWAYS_INLINE UInt32 F_4(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a & c) | (b & ~c)); }
+
+    static ALWAYS_INLINE UInt32 F_5(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ (b | ~c)); }
+
+    using FuncPtr = UInt32 (*)(UInt32, UInt32, UInt32);
+
+    static constexpr FuncPtr funcs[5] = {F_1, F_2, F_3, F_4, F_5};
+
+    static ALWAYS_INLINE FuncPtr getFunction(UInt8 func) noexcept { return funcs[func - 1]; }
+
+    void compute_line(
+        UInt32 * digest,
+        UInt32 * words,
+        const UInt32 * chunk,
+        UInt8 * index,
+        const UInt8 * sh,
+        const UInt32 * ks,
+        const UInt8 * fns) noexcept
+    {
+        std::memcpy(words, digest, 5 * sizeof(UInt32));
+        for (UInt8 round = 0; round < 5; ++round)
+        {
+            UInt32 k = ks[round];
+            UInt8 fn = fns[round];
+            for (UInt8 j = 0; j < 16; ++j)
+            {
+                UInt32 tmp = getFunction(fn)(words[1], words[2], words[3]);
+                tmp += words[0] + le32toh(chunk[index[j]]) + k;
+                tmp = rol(tmp, sh[index[j]]) + words[4];
+                words[0] = words[4];
+                words[4] = words[3];
+                words[3] = rol(words[2], 10);
+                words[2] = words[1];
+                words[1] = tmp;
+            }
+            sh += 16;
+            UInt8 index_tmp[16];
+            for (size_t i = 0; i < 16; ++i)
+                index_tmp[i] = rho[index[i]];
+            std::memcpy(index, index_tmp, 16);
+        }
+    }
+
+    /// Update the digest with the given chunk of data
+    void update(UInt32 * digest, const UInt32 * chunk) noexcept
+    {
+        UInt8 index[16];
+        for (UInt8 i = 0; i < 16; ++i)
+            index[i] = i;
+
+        UInt32 words_left[5];
+        compute_line(digest, words_left, chunk, index, shifts, constants_left, fns_left);
+
+        static constexpr UInt8 rho_index[16] = {5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12};
+        std::memcpy(index, rho_index, 16);
+
+        UInt32 words_right[5];
+        compute_line(digest, words_right, chunk, index, shifts, constants_right, fns_right);
+
+        digest[0] += words_left[1] + words_right[2];
+        digest[1] += words_left[2] + words_right[3];
+        digest[2] += words_left[3] + words_right[4];
+        digest[3] += words_left[4] + words_right[0];
+        digest[4] += words_left[0] + words_right[1];
+
+        std::rotate(digest, digest + 1, digest + 5);
+    }
+
+public:
+    void hash(const UInt8 * data, size_t data_len) noexcept
+    {
+        UInt32 digest[5];
+        for (size_t i = 0; i < 5; ++i)
+            digest[i] = make_le32(initial_digest[i]);
+
+        const UInt8 * last_chunk_start = data + (data_len & (~0x3f));
+        while (data < last_chunk_start)
+        {
+            update(digest, reinterpret_cast<const UInt32 *>(data));
+            data += 0x40;
+        }
+
+        UInt8 last_chunk[0x40] = {};
+        UInt8 leftover_size = data_len & 0x3f;
+        std::memcpy(last_chunk, data, leftover_size);
+
+        last_chunk[leftover_size] = 0x80;
+
+        if (leftover_size >= 0x38)
+        {
+            update(digest, reinterpret_cast<const UInt32 *>(last_chunk));
+            std::memset(last_chunk, 0, 0x38);
+        }
+
+        UInt32 data_len_bits = static_cast<UInt32>(data_len << 3);
+        std::memcpy(&last_chunk[0x38], &data_len_bits, sizeof(data_len_bits));
+        data_len_bits = static_cast<UInt32>(data_len >> 29);
+        std::memcpy(&last_chunk[0x3c], &data_len_bits, sizeof(data_len_bits));
+
+        update(digest, reinterpret_cast<const UInt32 *>(last_chunk));
+
+        for (size_t i = 0; i < 5; ++i)
+        {
+            UInt32 digest_part = make_le32(digest[i]);
+            std::memcpy(digest_bytes + i * 4, &digest_part, 4);
+        }
+    }
+
+    const UInt8 * get_digest_bytes() const noexcept { return digest_bytes; }
+};
+
+
+inline UInt256 ripeMD160Hash(const char * data, const size_t size) noexcept
+{
+    RIPEMD160 ripe;
+    ripe.hash(reinterpret_cast<const UInt8 *>(data), size);
+
+    UInt8 digest[20];
+    std::memcpy(digest, ripe.get_digest_bytes(), sizeof(digest));
+
+    std::reverse(digest, digest + sizeof(digest));
+
+    UInt256 res = 0;
+    std::memcpy(&res, digest, sizeof(digest));
+
+    return res;
+}
diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 95c54ac9528..8829e7c0479 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -14,6 +14,7 @@
 #include <xxhash.h>
 
 #include <Common/SipHash.h>
+#include <Common/RipeMD160Hash.h>
 #include <Common/typeid_cast.h>
 #include <Common/safe_cast.h>
 #include <Common/HashTable/Hash.h>
@@ -190,6 +191,19 @@ T combineHashesFunc(T t1, T t2)
     return HashFunction::apply(reinterpret_cast<const char *>(hashes), sizeof(hashes));
 }
 
+struct RipeMD160Impl
+{
+    static constexpr auto name = "ripeMD160";
+
+    using ReturnType = UInt256;
+
+    static UInt256 apply(const char * begin, size_t size) { return ripeMD160Hash(begin, size); }
+
+    static UInt256 combineHashes(UInt256 h1, UInt256 h2) { return combineHashesFunc<UInt256, RipeMD160Impl>(h1, h2); }
+
+    static constexpr bool use_int_hash_for_pods = false;
+};
+
 
 struct SipHash64Impl
 {
@@ -1646,6 +1660,8 @@ using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
 using FunctionXXH3 = FunctionAnyHash<ImplXXH3>;
 
 using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
+
+using FunctionRipeMD160Hash = FunctionAnyHash<RipeMD160Impl>;
 }
 
 #pragma clang diagnostic pop
diff --git a/src/Functions/FunctionsHashingRipe.cpp b/src/Functions/FunctionsHashingRipe.cpp
new file mode 100644
index 00000000000..5b06b8ab924
--- /dev/null
+++ b/src/Functions/FunctionsHashingRipe.cpp
@@ -0,0 +1,21 @@
+#include "FunctionsHashing.h"
+
+#include <Functions/FunctionFactory.h>
+
+/// FunctionsHashing instantiations are separated into files FunctionsHashing*.cpp
+/// to better parallelize the build procedure and avoid MSan build failure
+/// due to excessive resource consumption.
+namespace DB
+{
+REGISTER_FUNCTION(HashingRipe)
+{
+    factory.registerFunction<FunctionRipeMD160Hash>(FunctionDocumentation{
+        .description = "RIPEMD-160 hash function, primarily used in Bitcoin address generation.",
+        .examples{{"", "SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));", R"(
+            ┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
+            │ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B                      │
+            └───────────────────────────────────────────────────────────────┘
+        )"}},
+        .categories{"Hash"}});
+}
+}

From 5740df58b90f4b2f5532bd6b5ab5dc77a838a0a7 Mon Sep 17 00:00:00 2001
From: Dergousov <myksik442@gmail.com>
Date: Wed, 21 Aug 2024 01:17:40 +0300
Subject: [PATCH 017/114] feat: add test

---
 tests/queries/0_stateless/03222_ripeMD160.reference |  5 +++++
 tests/queries/0_stateless/03222_ripeMD160.sql       | 11 +++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 tests/queries/0_stateless/03222_ripeMD160.reference
 create mode 100644 tests/queries/0_stateless/03222_ripeMD160.sql

diff --git a/tests/queries/0_stateless/03222_ripeMD160.reference b/tests/queries/0_stateless/03222_ripeMD160.reference
new file mode 100644
index 00000000000..f0db55dc8e1
--- /dev/null
+++ b/tests/queries/0_stateless/03222_ripeMD160.reference
@@ -0,0 +1,5 @@
+37F332F68DB77BD9D7EDD4969571AD671CF9DD3B
+132072DF690933835EB8B6AD0B77E7B6F14ACAD7
+9C1185A5C5E9FC54612808977EE8F548B2258D31
+13920F39C93D503A0AC02EAB9AA8F672BC523ADA
+3FEDF0C212CCFA54C0EBA676C8A8A2A10BC218BE
diff --git a/tests/queries/0_stateless/03222_ripeMD160.sql b/tests/queries/0_stateless/03222_ripeMD160.sql
new file mode 100644
index 00000000000..592f9f830dd
--- /dev/null
+++ b/tests/queries/0_stateless/03222_ripeMD160.sql
@@ -0,0 +1,11 @@
+-- Ouput can be verified using: https://emn178.github.io/online-tools/ripemd-160/
+
+SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
+
+SELECT hex(ripeMD160('The quick brown fox jumps over the lazy cog'));
+
+SELECT hex(ripeMD160(''));
+
+SELECT hex(ripeMD160('CheREpaha1512'));
+
+SELECT hex(ripeMD160('A-very-long-string-that-should-be-hashed-using-ripeMD160'));

From bb2b6600961e432d115c08964d65ade23740861a Mon Sep 17 00:00:00 2001
From: Dergousov <myksik442@gmail.com>
Date: Wed, 21 Aug 2024 02:11:08 +0300
Subject: [PATCH 018/114] fix: cosmetic

---
 src/Common/RipeMD160Hash.h | 151 +++++++++++++++++++------------------
 1 file changed, 78 insertions(+), 73 deletions(-)

diff --git a/src/Common/RipeMD160Hash.h b/src/Common/RipeMD160Hash.h
index 0ff97dc13b2..a759553278c 100644
--- a/src/Common/RipeMD160Hash.h
+++ b/src/Common/RipeMD160Hash.h
@@ -14,42 +14,33 @@
 
 #include <city.h>
 
+/// https://homes.esat.kuleuven.be/~bosselae/ripemd160/pdf/AB-9601/AB-9601.pdf
+/// https://en.wikipedia.org/wiki/RIPEMD
 
-class RIPEMD160
+class RipeMD160
 {
 private:
+    using FuncPtr = UInt32 (*)(UInt32, UInt32, UInt32);
+
+    /// Stores the final 20-byte (160-bit) hash result
     UInt8 digest_bytes[20];
 
-    static constexpr UInt32 initial_digest[5] = {0x67452301UL, 0xefcdab89UL, 0x98badcfeUL, 0x10325476UL, 0xc3d2e1f0UL};
+    static constexpr UInt32 initial_hash_values[5] = {0x67452301UL, 0xEFCDAB89UL, 0x98BADCFEUL, 0x10325476UL, 0xC3D2E1F0UL};
 
-    static constexpr UInt8 rho[16] = {0x7, 0x4, 0xd, 0x1, 0xa, 0x6, 0xf, 0x3, 0xc, 0x0, 0x9, 0x5, 0x2, 0xe, 0xb, 0x8};
+    static constexpr UInt8 rho_order[16] = {0x7, 0x4, 0xD, 0x1, 0xA, 0x6, 0xF, 0x3, 0xC, 0x0, 0x9, 0x5, 0x2, 0xE, 0xB, 0x8};
 
-    static constexpr UInt8 shifts[80]
+    static constexpr UInt8 shift_amounts[80]
         = {11, 14, 15, 12, 5,  8,  7,  9,  11, 13, 14, 15, 6,  7,  9,  8,  12, 13, 11, 15, 6,  9,  9,  7,  12, 15, 11,
            13, 7,  8,  7,  7,  13, 15, 14, 11, 7,  7,  6,  8,  13, 14, 13, 12, 5,  5,  6,  9,  14, 11, 12, 14, 8,  6,
            5,  5,  15, 12, 15, 14, 9,  9,  8,  6,  15, 12, 13, 13, 9,  5,  8,  6,  14, 11, 12, 11, 8,  6,  5,  5};
 
-    static constexpr UInt32 constants_left[5] = {0x00000000UL, 0x5a827999UL, 0x6ed9eba1UL, 0x8f1bbcdcUL, 0xa953fd4eUL};
+     static constexpr UInt32 left_round_constants[5] = {0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL};
 
-   
-    static ALWAYS_INLINE UInt32 make_le32(UInt32 x) noexcept
-    {
-        if constexpr (std::endian::native == std::endian::little)
-        {
-            return x;
-        }
-        else
-        {
-            return __builtin_bswap32(x);
-        }
-    }
+    static constexpr UInt32 right_round_constants[5] = {0x50A28BE6UL, 0x5C4DD124UL, 0x6D703EF3UL, 0x7A6D76E9UL, 0x00000000UL};
 
-    static constexpr UInt32 constants_right[5] = {0x50a28be6UL, 0x5c4dd124UL, 0x6d703ef3UL, 0x7a6d76e9UL, 0x00000000UL};
+    static constexpr UInt8 left_function_order[5] = {1, 2, 3, 4, 5};
 
-    static constexpr UInt8 fns_left[5] = {1, 2, 3, 4, 5};
-    static constexpr UInt8 fns_right[5] = {5, 4, 3, 2, 1};
-
-    static ALWAYS_INLINE UInt32 rol(UInt32 x, UInt32 n) noexcept { return (x << n) | (x >> (32 - n)); }
+    static constexpr UInt8 right_function_order[5] = {5, 4, 3, 2, 1};
 
     static ALWAYS_INLINE UInt32 F_1(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ b ^ c); }
 
@@ -61,68 +52,82 @@ private:
 
     static ALWAYS_INLINE UInt32 F_5(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ (b | ~c)); }
 
-    using FuncPtr = UInt32 (*)(UInt32, UInt32, UInt32);
+    static constexpr FuncPtr hash_functions[5] = {F_1, F_2, F_3, F_4, F_5};
 
-    static constexpr FuncPtr funcs[5] = {F_1, F_2, F_3, F_4, F_5};
+    static ALWAYS_INLINE FuncPtr get_function(UInt8 function_id) noexcept { return hash_functions[function_id - 1]; }
 
-    static ALWAYS_INLINE FuncPtr getFunction(UInt8 func) noexcept { return funcs[func - 1]; }
-
-    void compute_line(
-        UInt32 * digest,
-        UInt32 * words,
-        const UInt32 * chunk,
-        UInt8 * index,
-        const UInt8 * sh,
-        const UInt32 * ks,
-        const UInt8 * fns) noexcept
+    static ALWAYS_INLINE UInt32 convert_to_little_endian(UInt32 x) noexcept
     {
-        std::memcpy(words, digest, 5 * sizeof(UInt32));
+        if constexpr (std::endian::native == std::endian::little)
+        {
+            return x;
+        }
+        else
+        {
+            return __builtin_bswap32(x);
+        }
+    }
+
+    static ALWAYS_INLINE UInt32 rotate_left(UInt32 value, UInt32 shift) noexcept { return (value << shift) | (value >> (32 - shift)); }
+
+    /// Performs one full pass (5 rounds) of RIPEMD-160 algorithm for one path (left or right)
+    void process_rounds(
+        UInt32 * current_digest,
+        UInt32 * temp_words,
+        const UInt32 * data_chunk,
+        UInt8 * index_order,
+        const UInt8 * shift_values,
+        const UInt32 * round_constants,
+        const UInt8 * function_order) noexcept
+    {
+        std::memcpy(temp_words, current_digest, 5 * sizeof(UInt32));
         for (UInt8 round = 0; round < 5; ++round)
         {
-            UInt32 k = ks[round];
-            UInt8 fn = fns[round];
+            UInt32 k = round_constants[round];
+            UInt8 fn = function_order[round];
             for (UInt8 j = 0; j < 16; ++j)
             {
-                UInt32 tmp = getFunction(fn)(words[1], words[2], words[3]);
-                tmp += words[0] + le32toh(chunk[index[j]]) + k;
-                tmp = rol(tmp, sh[index[j]]) + words[4];
-                words[0] = words[4];
-                words[4] = words[3];
-                words[3] = rol(words[2], 10);
-                words[2] = words[1];
-                words[1] = tmp;
+                UInt32 temp_result = get_function(fn)(temp_words[1], temp_words[2], temp_words[3]);
+                temp_result += temp_words[0] + convert_to_little_endian(data_chunk[index_order[j]]) + k;
+                temp_result = rotate_left(temp_result, shift_values[index_order[j]]) + temp_words[4];
+                temp_words[0] = temp_words[4];
+                temp_words[4] = temp_words[3];
+                temp_words[3] = rotate_left(temp_words[2], 10);
+                temp_words[2] = temp_words[1];
+                temp_words[1] = temp_result;
             }
-            sh += 16;
-            UInt8 index_tmp[16];
+            shift_values += 16;
+            UInt8 reordered_index[16];
             for (size_t i = 0; i < 16; ++i)
-                index_tmp[i] = rho[index[i]];
-            std::memcpy(index, index_tmp, 16);
+                reordered_index[i] = rho_order[index_order[i]];
+            std::memcpy(index_order, reordered_index, 16);
         }
     }
 
     /// Update the digest with the given chunk of data
-    void update(UInt32 * digest, const UInt32 * chunk) noexcept
+    void update_digest(UInt32 * current_digest, const UInt32 * data_chunk) noexcept
     {
-        UInt8 index[16];
+        UInt8 index_order[16];
         for (UInt8 i = 0; i < 16; ++i)
-            index[i] = i;
+            index_order[i] = i;
 
-        UInt32 words_left[5];
-        compute_line(digest, words_left, chunk, index, shifts, constants_left, fns_left);
+        UInt32 left_path_words[5];
+        process_rounds(current_digest, left_path_words, data_chunk, index_order, shift_amounts, left_round_constants, left_function_order);
 
-        static constexpr UInt8 rho_index[16] = {5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12};
-        std::memcpy(index, rho_index, 16);
+        static constexpr UInt8 rho_reordered_index[16] = {5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12};
+        std::memcpy(index_order, rho_reordered_index, 16);
 
-        UInt32 words_right[5];
-        compute_line(digest, words_right, chunk, index, shifts, constants_right, fns_right);
+        UInt32 right_path_words[5];
+        process_rounds(
+            current_digest, right_path_words, data_chunk, index_order, shift_amounts, right_round_constants, right_function_order);
 
-        digest[0] += words_left[1] + words_right[2];
-        digest[1] += words_left[2] + words_right[3];
-        digest[2] += words_left[3] + words_right[4];
-        digest[3] += words_left[4] + words_right[0];
-        digest[4] += words_left[0] + words_right[1];
+        current_digest[0] += left_path_words[1] + right_path_words[2];
+        current_digest[1] += left_path_words[2] + right_path_words[3];
+        current_digest[2] += left_path_words[3] + right_path_words[4];
+        current_digest[3] += left_path_words[4] + right_path_words[0];
+        current_digest[4] += left_path_words[0] + right_path_words[1];
 
-        std::rotate(digest, digest + 1, digest + 5);
+        std::rotate(current_digest, current_digest + 1, current_digest + 5);
     }
 
 public:
@@ -130,37 +135,37 @@ public:
     {
         UInt32 digest[5];
         for (size_t i = 0; i < 5; ++i)
-            digest[i] = make_le32(initial_digest[i]);
+            digest[i] = convert_to_little_endian(initial_hash_values[i]);
 
-        const UInt8 * last_chunk_start = data + (data_len & (~0x3f));
+        const UInt8 * last_chunk_start = data + (data_len & (~0x3F));
         while (data < last_chunk_start)
         {
-            update(digest, reinterpret_cast<const UInt32 *>(data));
+            update_digest(digest, reinterpret_cast<const UInt32 *>(data));
             data += 0x40;
         }
 
         UInt8 last_chunk[0x40] = {};
-        UInt8 leftover_size = data_len & 0x3f;
+        UInt8 leftover_size = data_len & 0x3F;
         std::memcpy(last_chunk, data, leftover_size);
 
         last_chunk[leftover_size] = 0x80;
 
         if (leftover_size >= 0x38)
         {
-            update(digest, reinterpret_cast<const UInt32 *>(last_chunk));
+            update_digest(digest, reinterpret_cast<const UInt32 *>(last_chunk));
             std::memset(last_chunk, 0, 0x38);
         }
 
         UInt32 data_len_bits = static_cast<UInt32>(data_len << 3);
         std::memcpy(&last_chunk[0x38], &data_len_bits, sizeof(data_len_bits));
         data_len_bits = static_cast<UInt32>(data_len >> 29);
-        std::memcpy(&last_chunk[0x3c], &data_len_bits, sizeof(data_len_bits));
+        std::memcpy(&last_chunk[0x3C], &data_len_bits, sizeof(data_len_bits));
 
-        update(digest, reinterpret_cast<const UInt32 *>(last_chunk));
+        update_digest(digest, reinterpret_cast<const UInt32 *>(last_chunk));
 
         for (size_t i = 0; i < 5; ++i)
         {
-            UInt32 digest_part = make_le32(digest[i]);
+            UInt32 digest_part = convert_to_little_endian(digest[i]);
             std::memcpy(digest_bytes + i * 4, &digest_part, 4);
         }
     }
@@ -171,7 +176,7 @@ public:
 
 inline UInt256 ripeMD160Hash(const char * data, const size_t size) noexcept
 {
-    RIPEMD160 ripe;
+    RipeMD160 ripe;
     ripe.hash(reinterpret_cast<const UInt8 *>(data), size);
 
     UInt8 digest[20];

From a0d29c812c4da7c7e8d15e798e141e7e6be910b9 Mon Sep 17 00:00:00 2001
From: Dergousov <myksik442@gmail.com>
Date: Wed, 21 Aug 2024 02:47:23 +0300
Subject: [PATCH 019/114] fix: cosmetic

---
 src/Common/RipeMD160Hash.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/RipeMD160Hash.h b/src/Common/RipeMD160Hash.h
index a759553278c..b6488225974 100644
--- a/src/Common/RipeMD160Hash.h
+++ b/src/Common/RipeMD160Hash.h
@@ -34,7 +34,7 @@ private:
            13, 7,  8,  7,  7,  13, 15, 14, 11, 7,  7,  6,  8,  13, 14, 13, 12, 5,  5,  6,  9,  14, 11, 12, 14, 8,  6,
            5,  5,  15, 12, 15, 14, 9,  9,  8,  6,  15, 12, 13, 13, 9,  5,  8,  6,  14, 11, 12, 11, 8,  6,  5,  5};
 
-     static constexpr UInt32 left_round_constants[5] = {0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL};
+    static constexpr UInt32 left_round_constants[5] = {0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL};
 
     static constexpr UInt32 right_round_constants[5] = {0x50A28BE6UL, 0x5C4DD124UL, 0x6D703EF3UL, 0x7A6D76E9UL, 0x00000000UL};
 

From e01a448bcc62a7e292766cddc0c817b9e44558d4 Mon Sep 17 00:00:00 2001
From: Zhigao Hong <zghong97@outlook.com>
Date: Wed, 21 Aug 2024 15:35:33 +0800
Subject: [PATCH 020/114] Fix invalid characters in replica_name

---
 src/Storages/MergeTree/registerStorageMergeTree.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 44548e33d46..9a65d590453 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -538,6 +538,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
 
         if (replica_name.empty())
             throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message);
+        // '\t' and '\n' will interrupt parsing 'source replica' in ReplicatedMergeTreeLogEntryData::readText
+        if (replica_name.find('\t') != String::npos || replica_name.find('\n') != String::npos)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must not contain '\\t' or '\\n'");
 
         arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments.
         arg_num = 2;                  /// zookeeper_path and replica_name together are always two arguments.

From 915daafd3a0c9f1539dad75dc3805e740f0bc75a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 21 Aug 2024 10:45:48 +0000
Subject: [PATCH 021/114] Fix 01086_window_view_cleanup.sh

---
 tests/queries/0_stateless/01086_window_view_cleanup.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh
index 1bfa3c50869..2e6cc7e2520 100755
--- a/tests/queries/0_stateless/01086_window_view_cleanup.sh
+++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh
@@ -13,7 +13,8 @@ opts=(
 
 DATABASE_ORDINARY="${CLICKHOUSE_DATABASE}_ordinary"
 
-$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 "
+$CLICKHOUSE_CLIENT "${opts[@]}" --query "
+    SET allow_deprecated_database_ordinary = 1;
     SET allow_experimental_window_view = 1;
     SET window_view_clean_interval = 1;
 
@@ -28,8 +29,7 @@ $CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 "
     INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 2, toDateTime('1990/01/01 12:00:01', 'US/Samoa'));
     INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 3, toDateTime('1990/01/01 12:00:02', 'US/Samoa'));
     INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 4, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));
-    INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));
-"
+    INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));"
 
 while true; do
 	$CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT count(*) FROM ${DATABASE_ORDINARY}.\`.inner.wv\`" | grep -q "5" && break || sleep .5 ||:

From a387807c8429af1c9ed471b80ad317c9171b40a4 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:14:51 +0200
Subject: [PATCH 022/114] Fix build

---
 src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 6263351897e..c9ca9efabee 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -1257,8 +1257,8 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam
         .format_name = format_name,
         .date_time_overflow_behavior = date_time_overflow_behavior,
         .allow_arrow_null_type = true,
-        .skip_columns_with_unsupported_types = false
-        .allow_inferring_nullable_columns = true;
+        .skip_columns_with_unsupported_types = false,
+        .allow_inferring_nullable_columns = true
     };
 
     Columns columns;

From 6db7b995439f873f9cd33d07019ea939ec51a3b7 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:42:46 +0200
Subject: [PATCH 023/114] Increase connectTimeoutMs IMDS connection timeout to
 50ms to avoid failures in CI

---
 src/IO/S3/Credentials.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp
index d6f7542da6b..fab3a0111eb 100644
--- a/src/IO/S3/Credentials.cpp
+++ b/src/IO/S3/Credentials.cpp
@@ -787,7 +787,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
             /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds.
             /// But the connection timeout should be small because there is the case when there is no IMDS at all,
             /// like outside of the cloud, on your own machines.
-            aws_client_configuration.connectTimeoutMs = 10;
+            aws_client_configuration.connectTimeoutMs = 50;
             aws_client_configuration.requestTimeoutMs = 1000;
 
             aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);

From 4a7a04b35b492aec779b42adc6f3f3eae354a947 Mon Sep 17 00:00:00 2001
From: leonkozlowski <leon.kozlowski@flocksafety.com>
Date: Wed, 21 Aug 2024 10:13:02 -0400
Subject: [PATCH 024/114] patch: build


From 3fd50ed856a9767094f19f08c93401fc4a5a80eb Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:23:37 +0200
Subject: [PATCH 025/114] Fix flaky test
 test_distributed_replica_max_ignored_errors

---
 tests/integration/test_distributed_load_balancing/test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py
index d61cca6ce12..a913c2ebb49 100644
--- a/tests/integration/test_distributed_load_balancing/test.py
+++ b/tests/integration/test_distributed_load_balancing/test.py
@@ -200,7 +200,6 @@ def test_distributed_replica_max_ignored_errors():
         "connect_timeout": 2,
         "receive_timeout": 2,
         "send_timeout": 2,
-        "idle_connection_timeout": 2,
         "tcp_keep_alive_timeout": 2,
         "distributed_replica_max_ignored_errors": 0,
         "distributed_replica_error_half_life": 60,

From 1afd3a7c3a7569b172ac3238f798c7850fd41bcf Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:24:43 +0200
Subject: [PATCH 026/114] give priority to parsed columns over storage columns

---
 src/Storages/Hive/StorageHive.cpp             |  2 +-
 .../ObjectStorage/StorageObjectStorage.cpp    |  2 +-
 .../StorageObjectStorageCluster.cpp           |  2 +-
 .../StorageObjectStorageSource.cpp            |  4 ++--
 .../StorageObjectStorageSource.h              |  4 ++--
 .../ObjectStorageQueueSource.cpp              |  2 +-
 .../ObjectStorageQueueSource.h                |  2 +-
 .../StorageObjectStorageQueue.cpp             |  2 +-
 src/Storages/StorageFile.cpp                  |  6 ++---
 src/Storages/StorageFileCluster.cpp           |  2 +-
 src/Storages/StorageURL.cpp                   |  6 ++---
 src/Storages/StorageURLCluster.cpp            |  2 +-
 src/Storages/VirtualColumnUtils.cpp           | 23 +++++++++++--------
 src/Storages/VirtualColumnUtils.h             |  4 ++--
 14 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp
index ae2e8cffe28..ea2e9e3eece 100644
--- a/src/Storages/Hive/StorageHive.cpp
+++ b/src/Storages/Hive/StorageHive.cpp
@@ -444,8 +444,8 @@ StorageHive::StorageHive(
     storage_metadata.setComment(comment_);
     storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
 
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, getContext()));
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext()));
 }
 
 void StorageHive::lazyInitialize()
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index d9c82d68791..a0f189e92fc 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -94,7 +94,7 @@ StorageObjectStorage::StorageObjectStorage(
     if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning)
         sample_path = getPathSample(metadata, context);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context, sample_path, format_settings));
     setInMemoryMetadata(metadata);
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index c214665f7e0..08a0739d929 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -68,7 +68,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
     if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning)
         sample_path = getPathSample(metadata, context_);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context_, sample_path));
     setInMemoryMetadata(metadata);
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index d8e26977e75..04e319cd0b8 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -208,7 +208,7 @@ Chunk StorageObjectStorageSource::generate()
                   .filename = &filename,
                   .last_modified = object_info->metadata->last_modified,
                   .etag = &(object_info->metadata->etag)
-                }, getContext(), read_from_format_info.columns_description);
+                }, getContext());
 
             const auto & partition_columns = configuration->getPartitionColumns();
             if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
@@ -280,7 +280,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
     const std::shared_ptr<IIterator> & file_iterator,
     const ConfigurationPtr & configuration,
     const ObjectStoragePtr & object_storage,
-    const ReadFromFormatInfo & read_from_format_info,
+    ReadFromFormatInfo & read_from_format_info,
     const std::optional<FormatSettings> & format_settings,
     const std::shared_ptr<const KeyCondition> & key_condition_,
     const ContextPtr & context_,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 6681dbf4578..7ae7a2358e9 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -74,7 +74,7 @@ protected:
     const UInt64 max_block_size;
     const bool need_only_count;
     const size_t max_parsing_threads;
-    const ReadFromFormatInfo read_from_format_info;
+    ReadFromFormatInfo read_from_format_info;
     const std::shared_ptr<ThreadPool> create_reader_pool;
 
     std::shared_ptr<IIterator> file_iterator;
@@ -122,7 +122,7 @@ protected:
         const std::shared_ptr<IIterator> & file_iterator,
         const ConfigurationPtr & configuration,
         const ObjectStoragePtr & object_storage,
-        const ReadFromFormatInfo & read_from_format_info,
+        ReadFromFormatInfo & read_from_format_info,
         const std::optional<FormatSettings> & format_settings,
         const std::shared_ptr<const KeyCondition> & key_condition_,
         const ContextPtr & context_,
diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
index 2634a7b2f1e..cde41b4afff 100644
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
@@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl()
                     {
                         .path = path,
                         .size = reader.getObjectInfo()->metadata->size_bytes
-                    }, getContext(), read_from_format_info.columns_description);
+                    }, getContext());
 
                 return chunk;
             }
diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h
index 0f3d0ab2e92..c085287e4f3 100644
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h
@@ -128,7 +128,7 @@ private:
     const std::shared_ptr<FileIterator> file_iterator;
     const ConfigurationPtr configuration;
     const ObjectStoragePtr object_storage;
-    const ReadFromFormatInfo read_from_format_info;
+    ReadFromFormatInfo read_from_format_info;
     const std::optional<FormatSettings> format_settings;
     const ObjectStorageQueueSettings queue_settings;
     const std::shared_ptr<ObjectStorageQueueMetadata> files_metadata;
diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
index 5dc3e01962c..9452ce81e9e 100644
--- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
+++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
@@ -169,7 +169,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
     storage_metadata.setColumns(columns);
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_));
     setInMemoryMetadata(storage_metadata);
 
     LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 766b7722cdf..50294df32a4 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1112,9 +1112,9 @@ void StorageFile::setStorageMetadata(CommonArguments args)
 
     storage_metadata.setConstraints(args.constraints);
     storage_metadata.setComment(args.comment);
-    setInMemoryMetadata(storage_metadata);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, args.getContext(), paths.empty() ? "" : paths[0], format_settings));
+    setInMemoryMetadata(storage_metadata);
 }
 
 
@@ -1468,7 +1468,7 @@ Chunk StorageFileSource::generate()
                     .size = current_file_size,
                     .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
                     .last_modified = current_file_last_modified
-                }, getContext(), columns_description);
+                }, getContext());
 
             return chunk;
         }
diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp
index 82ae0b761ae..c01738067c4 100644
--- a/src/Storages/StorageFileCluster.cpp
+++ b/src/Storages/StorageFileCluster.cpp
@@ -60,8 +60,8 @@ StorageFileCluster::StorageFileCluster(
     }
 
     storage_metadata.setConstraints(constraints_);
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, paths.empty() ? "" : paths[0]));
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0]));
 }
 
 void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 6442891cf23..fc1354b780a 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -165,9 +165,9 @@ IStorageURLBase::IStorageURLBase(
 
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_, getSampleURI(uri, context_), format_settings));
+    setInMemoryMetadata(storage_metadata);
 }
 
 
@@ -435,7 +435,7 @@ Chunk StorageURLSource::generate()
                 {
                     .path = curr_uri.getPath(),
                     .size = current_file_size,
-                }, getContext(), columns_description);
+                }, getContext());
             return chunk;
         }
 
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index 7c7a299c64e..140413d78b0 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -75,8 +75,8 @@ StorageURLCluster::StorageURLCluster(
     }
 
     storage_metadata.setConstraints(constraints_);
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, getSampleURI(uri, context)));
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context)));
 }
 
 void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index edf50907752..5b974cb8a22 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -129,7 +129,7 @@ NameSet getVirtualNamesForFileLikeStorage()
     return {"_path", "_file", "_size", "_time", "_etag"};
 }
 
-std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
+std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
 {
     std::string pattern = "([^/]+)=([^/]+)/";
     re2::StringPiece input_piece(path);
@@ -145,34 +145,37 @@ std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(
         used_keys.insert({key, value});
 
         auto col_name = key;
-        while (storage_columns.has(col_name))
-            col_name = "_" + col_name;
         key_values[col_name] = value;
     }
     return key_values;
 }
 
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
+VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
 {
     VirtualColumnsDescription desc;
 
     auto add_virtual = [&](const auto & name, const auto & type)
     {
-        if (storage_columns.has(name))
+        auto local_type = type;
+        if (storage_columns.has(name) && !context->getSettingsRef().use_hive_partitioning)
             return;
+        if (storage_columns.has(name))
+        {
+            local_type = storage_columns.get(name).type;
+            storage_columns.remove(name);
+        }
 
-        desc.addEphemeral(name, type, "");
+        desc.addEphemeral(name, local_type, "");
     };
 
     add_virtual("_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
     add_virtual("_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
     add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
     add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
-    add_virtual("_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
 
     if (context->getSettingsRef().use_hive_partitioning)
     {
-        auto map = parseHivePartitioningKeysAndValues(path, storage_columns);
+        auto map = parseHivePartitioningKeysAndValues(path);
         auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
         for (auto & item : map)
         {
@@ -245,11 +248,11 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns)
+    VirtualsForFileLikeStorage virtual_values, ContextPtr context)
 {
     std::unordered_map<std::string, std::string> hive_map;
     if (context->getSettingsRef().use_hive_partitioning)
-        hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns);
+        hive_map = parseHivePartitioningKeysAndValues(virtual_values.path);
 
     for (const auto & virtual_column : requested_virtual_columns)
     {
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 23e16871798..6aa08b2aef2 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -70,7 +70,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
 
 NameSet getVirtualNamesForFileLikeStorage();
 VirtualColumnsDescription getVirtualsForFileLikeStorage(
-    const ColumnsDescription & storage_columns,
+    ColumnsDescription & storage_columns,
     const ContextPtr & context,
     const std::string & sample_path = "",
     std::optional<FormatSettings> format_settings_ = std::nullopt);
@@ -105,7 +105,7 @@ struct VirtualsForFileLikeStorage
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns);
+    VirtualsForFileLikeStorage virtual_values, ContextPtr context);
 }
 
 }

From 5965297d8b2f26768fb0ee13a9aeec6d7cada0c9 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:35:39 +0200
Subject: [PATCH 027/114] add accidentally removed virtual column

---
 src/Storages/VirtualColumnUtils.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 5b974cb8a22..bbeb9ee6643 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -172,6 +172,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
     add_virtual("_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
     add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
     add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
+    add_virtual("_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
 
     if (context->getSettingsRef().use_hive_partitioning)
     {

From e87de3cfcd22870bf7aea3dfaf1607b180b2b1d8 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 21 Aug 2024 15:19:29 +0000
Subject: [PATCH 028/114] return back virtual columns to distributed tables

---
 src/Storages/StorageDistributed.cpp           |  4 ++++
 .../03228_virtual_column_merge_dist.reference |  8 +++++++
 .../03228_virtual_column_merge_dist.sql       | 24 +++++++++++++++++++
 3 files changed, 36 insertions(+)
 create mode 100644 tests/queries/0_stateless/03228_virtual_column_merge_dist.reference
 create mode 100644 tests/queries/0_stateless/03228_virtual_column_merge_dist.sql

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index e146e95f89f..c4668159759 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -290,6 +290,10 @@ VirtualColumnsDescription StorageDistributed::createVirtuals()
 
     desc.addEphemeral("_shard_num", std::make_shared<DataTypeUInt32>(), "Deprecated. Use function shardNum instead");
 
+    /// Add virtual columns from table of storage Merges.
+    desc.addEphemeral("_database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of database which the row comes from");
+    desc.addEphemeral("_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of table which the row comes from");
+
     return desc;
 }
 
diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference
new file mode 100644
index 00000000000..28f00bafdfe
--- /dev/null
+++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference
@@ -0,0 +1,8 @@
+1	t_local_1
+2	t_local_2
+1	t_local_1
+2	t_local_2
+1	1
+2	1
+1	1
+2	1
diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql
new file mode 100644
index 00000000000..caf00a2e407
--- /dev/null
+++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS t_local_1;
+DROP TABLE IF EXISTS t_local_2;
+DROP TABLE IF EXISTS t_merge;
+DROP TABLE IF EXISTS t_distr;
+
+CREATE TABLE t_local_1 (a UInt32) ENGINE = MergeTree ORDER BY a;
+CREATE TABLE t_local_2 (a UInt32) ENGINE = MergeTree ORDER BY  a;
+
+INSERT INTO t_local_1 VALUES (1);
+INSERT INTO t_local_2 VALUES (2);
+
+CREATE TABLE t_merge AS t_local_1 ENGINE = Merge(currentDatabase(), '^(t_local_1|t_local_2)$');
+CREATE TABLE t_distr AS t_local_1 engine=Distributed('test_shard_localhost', currentDatabase(), t_merge, rand());
+
+SELECT a, _table FROM t_merge ORDER BY a;
+SELECT a, _table FROM t_distr ORDER BY a;
+
+SELECT a, _database = currentDatabase() FROM t_merge ORDER BY a;
+SELECT a, _database = currentDatabase() FROM t_distr ORDER BY a;
+
+DROP TABLE IF EXISTS t_local_1;
+DROP TABLE IF EXISTS t_local_2;
+DROP TABLE IF EXISTS t_merge;
+DROP TABLE IF EXISTS t_distr;

From 2e58ac56111a075bdbaee566a4484a193a882792 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Wed, 21 Aug 2024 16:30:42 +0000
Subject: [PATCH 029/114] build fix

---
 src/Storages/VirtualColumnUtils.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index bbeb9ee6643..7e3e902f083 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -156,16 +156,20 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
 
     auto add_virtual = [&](const auto & name, const auto & type)
     {
-        auto local_type = type;
-        if (storage_columns.has(name) && !context->getSettingsRef().use_hive_partitioning)
-            return;
         if (storage_columns.has(name))
         {
-            local_type = storage_columns.get(name).type;
+            if (!context->getSettingsRef().use_hive_partitioning)
+                return;
+
+            if (storage_columns.size() == 1)
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use a file with one column {}, that is ised during hive partitioning", name);
+            auto local_type = storage_columns.get(name).type;
             storage_columns.remove(name);
+            desc.addEphemeral(name, local_type, "");
+            return;
         }
 
-        desc.addEphemeral(name, local_type, "");
+        desc.addEphemeral(name, type, "");
     };
 
     add_virtual("_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));

From a52eff299eb49291e2b57f68e5b2874c7704f9d2 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 21 Aug 2024 19:43:45 +0200
Subject: [PATCH 030/114] fix tests

---
 src/Storages/VirtualColumnUtils.cpp           |   2 +-
 .../test_storage_azure_blob_storage/test.py   |  43 ++----
 tests/integration/test_storage_hdfs/test.py   |  24 ++-
 .../03203_hive_style_partitioning.reference   | 145 +++++++++---------
 .../03203_hive_style_partitioning.sh          |   2 -
 5 files changed, 100 insertions(+), 116 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 7e3e902f083..ca82a1ce67a 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
                 return;
 
             if (storage_columns.size() == 1)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use a file with one column {}, that is ised during hive partitioning", name);
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file");
             auto local_type = storage_columns.get(name).type;
             storage_columns.remove(name);
             desc.addEphemeral(name, local_type, "");
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index fbdc7f29f98..637dbd38262 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}')"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
+        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format(
             bucket="cont", max_path=path
         )
     ]
@@ -1533,14 +1533,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
     query = (
         f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == ["Gordon"]
 
 
-def test_hive_partitioning_with_two_parameters(cluster):
+def test_hive_partitioning_with_all_parameters(cluster):
     # type: (ClickHouseCluster) -> None
     node = cluster.instances["node"]  # type: ClickHouseInstance
     table_format = "column1 String, column2 String"
@@ -1556,35 +1556,14 @@ def test_hive_partitioning_with_two_parameters(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
-            bucket="cont", max_path=path
-        )
-    ]
+    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
 
-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
-    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
-
-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
-    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
+    with pytest.raises(Exception, match=pattern):
+        azure_query(node, query, settings={"use_hive_partitioning": 1})
 
 
 def test_hive_partitioning_without_setting(cluster):
@@ -1603,9 +1582,9 @@ def test_hive_partitioning_without_setting(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
     pattern = re.compile(
         r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index fdbf7c5bacb..ad2e7084791 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1263,13 +1263,19 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
 
     r = node1.query(
-        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
         settings={"use_hive_partitioning": 1},
     )
     assert r == f"Elizabeth\n"
 
+    r = node1.query(
+        "SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        settings={"use_hive_partitioning": 1},
+    )
+    assert r == f"Gordon\n"
 
-def test_hive_partitioning_with_two_parameters(started_cluster):
+
+def test_hive_partitioning_with_all_parameters(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(
         f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
@@ -1279,11 +1285,13 @@ def test_hive_partitioning_with_two_parameters(started_cluster):
         == f"Elizabeth\tGordon\n"
     )
 
-    r = node1.query(
-        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-        settings={"use_hive_partitioning": 1},
-    )
-    assert r == f"Gordon\n"
+    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
+
+    with pytest.raises(QueryRuntimeException, match=pattern):
+        node1.query(
+            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            settings={"use_hive_partitioning": 1},
+        )
 
 
 def test_hive_partitioning_without_setting(started_cluster):
@@ -1301,7 +1309,7 @@ def test_hive_partitioning_without_setting(started_cluster):
 
     with pytest.raises(QueryRuntimeException, match=pattern):
         node1.query(
-            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
             settings={"use_hive_partitioning": 0},
         )
 
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index 12ffd17c102..b5eaef7f51e 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -1,4 +1,14 @@
 TESTING THE FILE HIVE PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@@ -10,25 +20,16 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 42	2020-01-01
 [1,2,3]	42.42
 Array(Int64)	LowCardinality(Float64)
@@ -37,10 +38,20 @@ Array(Int64)	LowCardinality(Float64)
 4081
 2070
 2070
-1
-1
 b
+1
+1
 TESTING THE URL PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@@ -52,26 +63,18 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
 1
 TESTING THE S3 PARTITIONING
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@@ -83,39 +86,35 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 OK
 TESTING THE S3CLUSTER PARTITIONING
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
+last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 5a0bd482985..41b215578f0 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -13,8 +13,6 @@ set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
 SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
 

From 74d8971432158a312777dcfba229513bfd876acc Mon Sep 17 00:00:00 2001
From: Dergousov <myksik442@gmail.com>
Date: Wed, 21 Aug 2024 22:06:52 +0300
Subject: [PATCH 031/114] fix: use OpenSSL RIPEMD160 impl

---
 src/Common/RipeMD160Hash.h       | 191 -------------------------------
 src/Functions/FunctionsHashing.h |  23 +++-
 2 files changed, 19 insertions(+), 195 deletions(-)
 delete mode 100644 src/Common/RipeMD160Hash.h

diff --git a/src/Common/RipeMD160Hash.h b/src/Common/RipeMD160Hash.h
deleted file mode 100644
index b6488225974..00000000000
--- a/src/Common/RipeMD160Hash.h
+++ /dev/null
@@ -1,191 +0,0 @@
-#pragma once
-
-#include <bit>
-#include <string>
-#include <type_traits>
-#include <Core/Defines.h>
-#include <base/extended_types.h>
-#include <base/hex.h>
-#include <base/types.h>
-#include <base/unaligned.h>
-#include <boost/multiprecision/cpp_int.hpp>
-#include <Common/Exception.h>
-#include <Common/transformEndianness.h>
-
-#include <city.h>
-
-/// https://homes.esat.kuleuven.be/~bosselae/ripemd160/pdf/AB-9601/AB-9601.pdf
-/// https://en.wikipedia.org/wiki/RIPEMD
-
-class RipeMD160
-{
-private:
-    using FuncPtr = UInt32 (*)(UInt32, UInt32, UInt32);
-
-    /// Stores the final 20-byte (160-bit) hash result
-    UInt8 digest_bytes[20];
-
-    static constexpr UInt32 initial_hash_values[5] = {0x67452301UL, 0xEFCDAB89UL, 0x98BADCFEUL, 0x10325476UL, 0xC3D2E1F0UL};
-
-    static constexpr UInt8 rho_order[16] = {0x7, 0x4, 0xD, 0x1, 0xA, 0x6, 0xF, 0x3, 0xC, 0x0, 0x9, 0x5, 0x2, 0xE, 0xB, 0x8};
-
-    static constexpr UInt8 shift_amounts[80]
-        = {11, 14, 15, 12, 5,  8,  7,  9,  11, 13, 14, 15, 6,  7,  9,  8,  12, 13, 11, 15, 6,  9,  9,  7,  12, 15, 11,
-           13, 7,  8,  7,  7,  13, 15, 14, 11, 7,  7,  6,  8,  13, 14, 13, 12, 5,  5,  6,  9,  14, 11, 12, 14, 8,  6,
-           5,  5,  15, 12, 15, 14, 9,  9,  8,  6,  15, 12, 13, 13, 9,  5,  8,  6,  14, 11, 12, 11, 8,  6,  5,  5};
-
-    static constexpr UInt32 left_round_constants[5] = {0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL};
-
-    static constexpr UInt32 right_round_constants[5] = {0x50A28BE6UL, 0x5C4DD124UL, 0x6D703EF3UL, 0x7A6D76E9UL, 0x00000000UL};
-
-    static constexpr UInt8 left_function_order[5] = {1, 2, 3, 4, 5};
-
-    static constexpr UInt8 right_function_order[5] = {5, 4, 3, 2, 1};
-
-    static ALWAYS_INLINE UInt32 F_1(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ b ^ c); }
-
-    static ALWAYS_INLINE UInt32 F_2(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a & b) | (~a & c)); }
-
-    static ALWAYS_INLINE UInt32 F_3(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a | ~b) ^ c); }
-
-    static ALWAYS_INLINE UInt32 F_4(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a & c) | (b & ~c)); }
-
-    static ALWAYS_INLINE UInt32 F_5(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ (b | ~c)); }
-
-    static constexpr FuncPtr hash_functions[5] = {F_1, F_2, F_3, F_4, F_5};
-
-    static ALWAYS_INLINE FuncPtr get_function(UInt8 function_id) noexcept { return hash_functions[function_id - 1]; }
-
-    static ALWAYS_INLINE UInt32 convert_to_little_endian(UInt32 x) noexcept
-    {
-        if constexpr (std::endian::native == std::endian::little)
-        {
-            return x;
-        }
-        else
-        {
-            return __builtin_bswap32(x);
-        }
-    }
-
-    static ALWAYS_INLINE UInt32 rotate_left(UInt32 value, UInt32 shift) noexcept { return (value << shift) | (value >> (32 - shift)); }
-
-    /// Performs one full pass (5 rounds) of RIPEMD-160 algorithm for one path (left or right)
-    void process_rounds(
-        UInt32 * current_digest,
-        UInt32 * temp_words,
-        const UInt32 * data_chunk,
-        UInt8 * index_order,
-        const UInt8 * shift_values,
-        const UInt32 * round_constants,
-        const UInt8 * function_order) noexcept
-    {
-        std::memcpy(temp_words, current_digest, 5 * sizeof(UInt32));
-        for (UInt8 round = 0; round < 5; ++round)
-        {
-            UInt32 k = round_constants[round];
-            UInt8 fn = function_order[round];
-            for (UInt8 j = 0; j < 16; ++j)
-            {
-                UInt32 temp_result = get_function(fn)(temp_words[1], temp_words[2], temp_words[3]);
-                temp_result += temp_words[0] + convert_to_little_endian(data_chunk[index_order[j]]) + k;
-                temp_result = rotate_left(temp_result, shift_values[index_order[j]]) + temp_words[4];
-                temp_words[0] = temp_words[4];
-                temp_words[4] = temp_words[3];
-                temp_words[3] = rotate_left(temp_words[2], 10);
-                temp_words[2] = temp_words[1];
-                temp_words[1] = temp_result;
-            }
-            shift_values += 16;
-            UInt8 reordered_index[16];
-            for (size_t i = 0; i < 16; ++i)
-                reordered_index[i] = rho_order[index_order[i]];
-            std::memcpy(index_order, reordered_index, 16);
-        }
-    }
-
-    /// Update the digest with the given chunk of data
-    void update_digest(UInt32 * current_digest, const UInt32 * data_chunk) noexcept
-    {
-        UInt8 index_order[16];
-        for (UInt8 i = 0; i < 16; ++i)
-            index_order[i] = i;
-
-        UInt32 left_path_words[5];
-        process_rounds(current_digest, left_path_words, data_chunk, index_order, shift_amounts, left_round_constants, left_function_order);
-
-        static constexpr UInt8 rho_reordered_index[16] = {5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12};
-        std::memcpy(index_order, rho_reordered_index, 16);
-
-        UInt32 right_path_words[5];
-        process_rounds(
-            current_digest, right_path_words, data_chunk, index_order, shift_amounts, right_round_constants, right_function_order);
-
-        current_digest[0] += left_path_words[1] + right_path_words[2];
-        current_digest[1] += left_path_words[2] + right_path_words[3];
-        current_digest[2] += left_path_words[3] + right_path_words[4];
-        current_digest[3] += left_path_words[4] + right_path_words[0];
-        current_digest[4] += left_path_words[0] + right_path_words[1];
-
-        std::rotate(current_digest, current_digest + 1, current_digest + 5);
-    }
-
-public:
-    void hash(const UInt8 * data, size_t data_len) noexcept
-    {
-        UInt32 digest[5];
-        for (size_t i = 0; i < 5; ++i)
-            digest[i] = convert_to_little_endian(initial_hash_values[i]);
-
-        const UInt8 * last_chunk_start = data + (data_len & (~0x3F));
-        while (data < last_chunk_start)
-        {
-            update_digest(digest, reinterpret_cast<const UInt32 *>(data));
-            data += 0x40;
-        }
-
-        UInt8 last_chunk[0x40] = {};
-        UInt8 leftover_size = data_len & 0x3F;
-        std::memcpy(last_chunk, data, leftover_size);
-
-        last_chunk[leftover_size] = 0x80;
-
-        if (leftover_size >= 0x38)
-        {
-            update_digest(digest, reinterpret_cast<const UInt32 *>(last_chunk));
-            std::memset(last_chunk, 0, 0x38);
-        }
-
-        UInt32 data_len_bits = static_cast<UInt32>(data_len << 3);
-        std::memcpy(&last_chunk[0x38], &data_len_bits, sizeof(data_len_bits));
-        data_len_bits = static_cast<UInt32>(data_len >> 29);
-        std::memcpy(&last_chunk[0x3C], &data_len_bits, sizeof(data_len_bits));
-
-        update_digest(digest, reinterpret_cast<const UInt32 *>(last_chunk));
-
-        for (size_t i = 0; i < 5; ++i)
-        {
-            UInt32 digest_part = convert_to_little_endian(digest[i]);
-            std::memcpy(digest_bytes + i * 4, &digest_part, 4);
-        }
-    }
-
-    const UInt8 * get_digest_bytes() const noexcept { return digest_bytes; }
-};
-
-
-inline UInt256 ripeMD160Hash(const char * data, const size_t size) noexcept
-{
-    RipeMD160 ripe;
-    ripe.hash(reinterpret_cast<const UInt8 *>(data), size);
-
-    UInt8 digest[20];
-    std::memcpy(digest, ripe.get_digest_bytes(), sizeof(digest));
-
-    std::reverse(digest, digest + sizeof(digest));
-
-    UInt256 res = 0;
-    std::memcpy(&res, digest, sizeof(digest));
-
-    return res;
-}
diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 8829e7c0479..5111ee2bd90 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -14,13 +14,14 @@
 #include <xxhash.h>
 
 #include <Common/SipHash.h>
-#include <Common/RipeMD160Hash.h>
 #include <Common/typeid_cast.h>
 #include <Common/safe_cast.h>
 #include <Common/HashTable/Hash.h>
 
 #if USE_SSL
+#    include <openssl/evp.h>
 #    include <openssl/md5.h>
+#    include <openssl/ripemd.h>
 #endif
 
 #include <bit>
@@ -194,12 +195,26 @@ T combineHashesFunc(T t1, T t2)
 struct RipeMD160Impl
 {
     static constexpr auto name = "ripeMD160";
-
     using ReturnType = UInt256;
 
-    static UInt256 apply(const char * begin, size_t size) { return ripeMD160Hash(begin, size); }
+    static UInt256 apply(const char * begin, size_t size)
+    {
+        UInt8 digest[RIPEMD160_DIGEST_LENGTH];
 
-    static UInt256 combineHashes(UInt256 h1, UInt256 h2) { return combineHashesFunc<UInt256, RipeMD160Impl>(h1, h2); }
+        RIPEMD160(reinterpret_cast<const unsigned char *>(begin), size, reinterpret_cast<unsigned char *>(digest));
+
+        std::reverse(digest, digest + RIPEMD160_DIGEST_LENGTH);
+
+        UInt256 res = 0;
+        std::memcpy(&res, digest, RIPEMD160_DIGEST_LENGTH);
+
+        return res;
+    }
+
+    static UInt256 combineHashes(UInt256 h1, UInt256 h2)
+    {
+        return combineHashesFunc<UInt256, RipeMD160Impl>(h1, h2);
+    }
 
     static constexpr bool use_int_hash_for_pods = false;
 };

From 38f9ef6bc95550d727bc56627fd029741e99177c Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 21 Aug 2024 19:08:07 +0000
Subject: [PATCH 032/114] Fix ColumnVariant permutation

---
 src/Columns/ColumnVariant.cpp                  | 12 ++++++++++--
 .../03228_variant_permutation_issue.reference  |  4 ++++
 .../03228_variant_permutation_issue.sql        | 18 ++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03228_variant_permutation_issue.reference
 create mode 100644 tests/queries/0_stateless/03228_variant_permutation_issue.sql

diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp
index 28a4860b546..2fea3eca123 100644
--- a/src/Columns/ColumnVariant.cpp
+++ b/src/Columns/ColumnVariant.cpp
@@ -1009,8 +1009,16 @@ ColumnPtr ColumnVariant::indexImpl(const PaddedPODArray<Type> & indexes, size_t
     new_variants.reserve(num_variants);
     for (size_t i = 0; i != num_variants; ++i)
     {
-        size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size();
-        new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit));
+        /// Check if no values from this variant were selected.
+        if (nested_perms[i].empty())
+        {
+            new_variants.emplace_back(variants[i]->cloneEmpty());
+        }
+        else
+        {
+            size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size();
+            new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit));
+        }
     }
 
     /// We cannot use new_offsets column as an offset column, because it became invalid after variants permutation.
diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference
new file mode 100644
index 00000000000..7b18a0c59fb
--- /dev/null
+++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference
@@ -0,0 +1,4 @@
+2	{"foo2":"bar"}	1
+3	{"foo2":"bar"}	1
+3	{"foo2":"bar"}	1
+2	{"foo2":"baz"}	2
diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql
new file mode 100644
index 00000000000..3f60d42ffbd
--- /dev/null
+++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql
@@ -0,0 +1,18 @@
+SET allow_experimental_json_type = 1;
+
+DROP TABLE IF EXISTS test_new_json_type;
+CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id;
+INSERT INTO test_new_json_type format JSONEachRow
+{"id":1,"data":{"foo1":"bar"},"version":1}
+{"id":2,"data":{"foo2":"bar"},"version":1}
+{"id":3,"data":{"foo2":"bar"},"version":1}
+;
+
+SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null;
+
+INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2;
+
+SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null;
+
+DROP TABLE test_new_json_type;
+

From dfe0beb53b4f0d1da50bf04e9c9e3e06f8b29ad2 Mon Sep 17 00:00:00 2001
From: Dergousov <myksik442@gmail.com>
Date: Wed, 21 Aug 2024 22:46:29 +0300
Subject: [PATCH 033/114] feat: add docs

---
 .../sql-reference/functions/hash-functions.md | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 7c977e7d6dc..d610e23fdda 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -688,6 +688,36 @@ SELECT kostikConsistentHash(16045690984833335023, 2);
 └───────────────────────────────────────────────┘
 ```
 
+## ripeMD160
+
+**Syntax**
+
+```sql
+ripeMD160('input')
+```
+
+**Parameters**
+
+- `input`: Input string. [String](../data-types/string.md)
+
+**Returned value**
+
+- A [UInt256](../data-types/int-uint.md) hash value of type [FixedString(20)](../data-types/fixedstring.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
+```
+
+```response
+┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
+│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B                      │
+└───────────────────────────────────────────────────────────────┘
+```
+
 ## murmurHash2_32, murmurHash2_64
 
 Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.

From 7f15f61426d07561ad0e24d946ac126961038a0c Mon Sep 17 00:00:00 2001
From: Dergousov <myksik442@gmail.com>
Date: Wed, 21 Aug 2024 22:46:55 +0300
Subject: [PATCH 034/114] feat: add docs

---
 .../sql-reference/functions/hash-functions.md |  6 +++-
 .../sql-reference/functions/hash-functions.md | 32 +++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index d610e23fdda..9b7ac8af0e3 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -689,6 +689,9 @@ SELECT kostikConsistentHash(16045690984833335023, 2);
 ```
 
 ## ripeMD160
+Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash of a string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md).
+
+
 
 **Syntax**
 
@@ -702,9 +705,10 @@ ripeMD160('input')
 
 **Returned value**
 
-- A [UInt256](../data-types/int-uint.md) hash value of type [FixedString(20)](../data-types/fixedstring.md).
+- A [UInt256](../data-types/int-uint.md) hash value
 
 **Example**
+Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
 
 Query:
 
diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md
index 98b6d8d4b17..66d77e66972 100644
--- a/docs/ru/sql-reference/functions/hash-functions.md
+++ b/docs/ru/sql-reference/functions/hash-functions.md
@@ -124,6 +124,38 @@ SELECT hex(sipHash128('foo', '\x01', 3));
 └──────────────────────────────────┘
 ```
 
+## ripeMD160
+Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки и возвращает полученный набор байт в виде [FixedString](../data-types/fixedstring.md).
+
+**Синтаксис**
+
+```sql
+ripeMD160('input')
+```
+
+**Аргументы**
+
+- `input`: Строка [String](../data-types/string.md)
+
+**Возвращаемое значение**
+
+- [UInt256](../data-types/int-uint.md) хеш-значение
+
+**Пример**
+Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой
+
+Запрос:
+
+```sql
+SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
+```
+Результат:
+```response
+┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
+│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B                      │
+└───────────────────────────────────────────────────────────────┘
+```
+
 ## cityHash64 {#cityhash64}
 
 Генерирует 64-х битное значение [CityHash](https://github.com/google/cityhash).

From 6e5465ae5126f3281d81172e952b6811f8946f2d Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Wed, 21 Aug 2024 15:47:08 +0200
Subject: [PATCH 035/114] CI: SQLLogix job fix

---
 docker/test/sqllogic/Dockerfile                |  3 ---
 tests/ci/sqllogic_test.py                      | 10 ++++------
 .../docker_scripts/sqllogic_runner.sh          | 18 +++++++++---------
 3 files changed, 13 insertions(+), 18 deletions(-)
 rename docker/test/sqllogic/run.sh => tests/docker_scripts/sqllogic_runner.sh (87%)

diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile
index 6397526388e..0d21a2da44e 100644
--- a/docker/test/sqllogic/Dockerfile
+++ b/docker/test/sqllogic/Dockerfile
@@ -40,6 +40,3 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git"
 
 RUN git clone --recursive ${sqllogic_test_repo}
-
-COPY run.sh /
-CMD ["/bin/bash", "/run.sh"]
diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py
index 63880f07e92..7fe44c235c7 100755
--- a/tests/ci/sqllogic_test.py
+++ b/tests/ci/sqllogic_test.py
@@ -31,7 +31,7 @@ IMAGE_NAME = "clickhouse/sqllogic-test"
 
 def get_run_command(
     builds_path: Path,
-    repo_tests_path: Path,
+    repo_path: Path,
     result_path: Path,
     server_log_path: Path,
     image: DockerImage,
@@ -39,11 +39,11 @@ def get_run_command(
     return (
         f"docker run "
         f"--volume={builds_path}:/package_folder "
-        f"--volume={repo_tests_path}:/clickhouse-tests "
+        f"--volume={repo_path}:/repo "
         f"--volume={result_path}:/test_output "
         f"--volume={server_log_path}:/var/log/clickhouse-server "
         "--security-opt seccomp=unconfined "  # required to issue io_uring sys-calls
-        f"--cap-add=SYS_PTRACE {image}"
+        f"--cap-add=SYS_PTRACE {image} /repo/tests/docker_scripts/sqllogic_runner.sh"
     )
 
 
@@ -94,8 +94,6 @@ def main():
 
     docker_image = pull_image(get_docker_image(IMAGE_NAME))
 
-    repo_tests_path = repo_path / "tests"
-
     packages_path = temp_path / "packages"
     packages_path.mkdir(parents=True, exist_ok=True)
 
@@ -111,7 +109,7 @@ def main():
 
     run_command = get_run_command(  # run script inside docker
         packages_path,
-        repo_tests_path,
+        repo_path,
         result_path,
         server_log_path,
         docker_image,
diff --git a/docker/test/sqllogic/run.sh b/tests/docker_scripts/sqllogic_runner.sh
similarity index 87%
rename from docker/test/sqllogic/run.sh
rename to tests/docker_scripts/sqllogic_runner.sh
index 32368980f9b..8b8f1e7aec7 100755
--- a/docker/test/sqllogic/run.sh
+++ b/tests/docker_scripts/sqllogic_runner.sh
@@ -15,10 +15,10 @@ echo "Files in current directory"
 ls -la ./
 echo "Files in root directory"
 ls -la /
-echo "Files in /clickhouse-tests directory"
-ls -la /clickhouse-tests
-echo "Files in /clickhouse-tests/sqllogic directory"
-ls -la /clickhouse-tests/sqllogic
+echo "Files in /repo/tests directory"
+ls -la /repo/tests
+echo "Files in /repo/tests/sqllogic directory"
+ls -la /repo/tests/sqllogic
 echo "Files in /package_folder directory"
 ls -la /package_folder
 echo "Files in /test_output"
@@ -45,13 +45,13 @@ function run_tests()
 
     cd /test_output
 
-    /clickhouse-tests/sqllogic/runner.py --help 2>&1 \
+    /repo/tests/sqllogic/runner.py --help 2>&1 \
         | ts '%Y-%m-%d %H:%M:%S'
 
     mkdir -p /test_output/self-test
-    /clickhouse-tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \
+    /repo/tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \
         self-test \
-        --self-test-dir /clickhouse-tests/sqllogic/self-test \
+        --self-test-dir /repo/tests/sqllogic/self-test \
         --out-dir /test_output/self-test \
         2>&1 \
         | ts '%Y-%m-%d %H:%M:%S'
@@ -63,7 +63,7 @@ function run_tests()
     if [ -d /sqllogictest ]
     then
         mkdir -p /test_output/statements-test
-        /clickhouse-tests/sqllogic/runner.py \
+        /repo/tests/sqllogic/runner.py \
         --log-file /test_output/runner-statements-test.log \
         --log-level info \
             statements-test \
@@ -77,7 +77,7 @@ function run_tests()
         tar -zcvf statements-check.tar.gz statements-test 1>/dev/null
 
         mkdir -p /test_output/complete-test
-        /clickhouse-tests/sqllogic/runner.py \
+        /repo/tests/sqllogic/runner.py \
         --log-file /test_output/runner-complete-test.log \
         --log-level info \
             complete-test \

From ca880ccdee16a212cebccce7090eabf4f528aa68 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 21 Aug 2024 20:47:48 +0000
Subject: [PATCH 036/114] Fix structure comparison between 2 JSON columns

---
 src/Columns/ColumnObject.cpp                  |  4 ++--
 .../03229_json_structure_comparison.reference |  3 +++
 .../03229_json_structure_comparison.sql       | 22 +++++++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03229_json_structure_comparison.reference
 create mode 100644 tests/queries/0_stateless/03229_json_structure_comparison.sql

diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp
index b7194ef50e7..999c0f6088e 100644
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@@ -127,7 +127,7 @@ std::string ColumnObject::getName() const
 {
     WriteBufferFromOwnString ss;
     ss << "Object(";
-    ss << "max_dynamic_paths=" << max_dynamic_paths;
+    ss << "max_dynamic_paths=" << global_max_dynamic_paths;
     ss << ", max_dynamic_types=" << max_dynamic_types;
     std::vector<String> sorted_typed_paths;
     sorted_typed_paths.reserve(typed_paths.size());
@@ -1047,7 +1047,7 @@ bool ColumnObject::structureEquals(const IColumn & rhs) const
 {
     /// 2 Object columns have equal structure if they have the same typed paths and max_dynamic_paths/max_dynamic_types.
     const auto * rhs_object = typeid_cast<const ColumnObject *>(&rhs);
-    if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || max_dynamic_paths != rhs_object->max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types)
+    if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types)
         return false;
 
     for (const auto & [path, column] : typed_paths)
diff --git a/tests/queries/0_stateless/03229_json_structure_comparison.reference b/tests/queries/0_stateless/03229_json_structure_comparison.reference
new file mode 100644
index 00000000000..c816df4f5c7
--- /dev/null
+++ b/tests/queries/0_stateless/03229_json_structure_comparison.reference
@@ -0,0 +1,3 @@
+{"foo1":"bar"}	{"foo1":"bar"}
+{"foo2":"bar"}	{"foo2":"bar"}
+{"foo2":"bar"}	{"foo2":"bar"}
diff --git a/tests/queries/0_stateless/03229_json_structure_comparison.sql b/tests/queries/0_stateless/03229_json_structure_comparison.sql
new file mode 100644
index 00000000000..16db469325d
--- /dev/null
+++ b/tests/queries/0_stateless/03229_json_structure_comparison.sql
@@ -0,0 +1,22 @@
+SET allow_experimental_json_type=1;
+
+DROP TABLE IF EXISTS test_new_json_type;
+
+CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id;
+
+INSERT INTO test_new_json_type format JSONEachRow
+{"id":1,"data":{"foo1":"bar"},"version":1}
+{"id":2,"data":{"foo2":"bar"},"version":1}
+{"id":3,"data":{"foo2":"bar"},"version":1}
+;
+
+SELECT
+    a.data
+    , b.data
+FROM
+    test_new_json_type a
+    JOIN test_new_json_type b
+        ON a.id = b.id;
+
+DROP TABLE test_new_json_type;
+

From bff252ea73f5141b1c85bccb69780f7e27c9a6f7 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 21 Aug 2024 21:45:26 +0000
Subject: [PATCH 037/114] Fix test

---
 .../0_stateless/03228_variant_permutation_issue.reference     | 2 +-
 tests/queries/0_stateless/03228_variant_permutation_issue.sql | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference
index 7b18a0c59fb..10688253e15 100644
--- a/tests/queries/0_stateless/03228_variant_permutation_issue.reference
+++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference
@@ -1,4 +1,4 @@
 2	{"foo2":"bar"}	1
 3	{"foo2":"bar"}	1
-3	{"foo2":"bar"}	1
 2	{"foo2":"baz"}	2
+3	{"foo2":"bar"}	1
diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql
index 3f60d42ffbd..088361d6430 100644
--- a/tests/queries/0_stateless/03228_variant_permutation_issue.sql
+++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql
@@ -8,11 +8,11 @@ INSERT INTO test_new_json_type format JSONEachRow
 {"id":3,"data":{"foo2":"bar"},"version":1}
 ;
 
-SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null;
+SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id;
 
 INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2;
 
-SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null;
+SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id;
 
 DROP TABLE test_new_json_type;
 

From 2f6ad1271cfbd9aa62ad2365e70314aba4da21b9 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 00:27:02 +0200
Subject: [PATCH 038/114] fix tests + exception

---
 src/Storages/VirtualColumnUtils.cpp           |  2 +-
 .../test_storage_azure_blob_storage/test.py   | 10 +--
 tests/integration/test_storage_hdfs/test.py   |  9 +--
 .../03203_hive_style_partitioning.reference   |  2 -
 .../03203_hive_style_partitioning.sh          | 61 +++----------------
 5 files changed, 15 insertions(+), 69 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index ca82a1ce67a..f0d276e4e56 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
                 return;
 
             if (storage_columns.size() == 1)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file");
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use hive partitioning for file {}: it contains only partition columns. Disable use_hive_partitioning setting to read this file", path);
             auto local_type = storage_columns.get(name).type;
             storage_columns.remove(name);
             desc.addEphemeral(name, local_type, "");
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 637dbd38262..a3172329a99 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}')"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format(
+        "Gordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
             bucket="cont", max_path=path
         )
     ]
@@ -1560,7 +1560,7 @@ def test_hive_partitioning_with_all_parameters(cluster):
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
-    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
+    pattern = r"DB::Exception: Cannot use hive partitioning for file"
 
     with pytest.raises(Exception, match=pattern):
         azure_query(node, query, settings={"use_hive_partitioning": 1})
@@ -1572,7 +1572,7 @@ def test_hive_partitioning_without_setting(cluster):
     table_format = "column1 String, column2 String"
     values_1 = f"('Elizabeth', 'Gordon')"
     values_2 = f"('Emilia', 'Gregor')"
-    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
+    path = "a/column1=Elizabeth/column2=Gordon/column3=Gordon/sample.csv"
 
     azure_query(
         node,
@@ -1582,7 +1582,7 @@ def test_hive_partitioning_without_setting(cluster):
     )
 
     query = (
-        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}');"
     )
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index ad2e7084791..ea8c4efa745 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1268,11 +1268,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     )
     assert r == f"Elizabeth\n"
 
-    r = node1.query(
-        "SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
-        settings={"use_hive_partitioning": 1},
-    )
-    assert r == f"Gordon\n"
 
 
 def test_hive_partitioning_with_all_parameters(started_cluster):
@@ -1285,11 +1280,11 @@ def test_hive_partitioning_with_all_parameters(started_cluster):
         == f"Elizabeth\tGordon\n"
     )
 
-    pattern = r"DB::Exception: Cannot implement partition by all columns in a file"
+    pattern = r"DB::Exception: Cannot use hive partitioning for file"
 
     with pytest.raises(QueryRuntimeException, match=pattern):
         node1.query(
-            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
             settings={"use_hive_partitioning": 1},
         )
 
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index b5eaef7f51e..af52dcd9b88 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -35,8 +35,6 @@ Cross	Elizabeth
 Array(Int64)	LowCardinality(Float64)
 101
 2070
-4081
-2070
 2070
 b
 1
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 41b215578f0..4e165446c34 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -11,22 +11,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
 
 SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
 SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
@@ -37,7 +25,6 @@ SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
 SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
 SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """
@@ -61,21 +48,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
-
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
 
@@ -92,24 +65,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
 $CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
-
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
+SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
 """
 
 $CLICKHOUSE_CLIENT -n -q """
@@ -123,13 +82,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'"
 $CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth';
 """

From 8a89cd31a1e7770479af6eaf1b4211ef4ece1795 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 22 Aug 2024 00:29:32 +0200
Subject: [PATCH 039/114] Fix Upgrade Check: move some settings to 24.9 section

---
 src/Core/SettingsChangesHistory.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index fb59577b0f0..5e831c6301c 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -72,11 +72,13 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
     {"24.9",
         {
             {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
+            {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
+            {"create_if_not_exists", false, false, "New setting."},
+            {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
         }
     },
     {"24.8",
         {
-            {"create_if_not_exists", false, false, "New setting."},
             {"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
             {"restore_replace_external_table_functions_to_null", false, false, "New setting."},
             {"restore_replace_external_engines_to_null", false, false, "New setting."},
@@ -85,7 +87,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
             {"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
             {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
-            {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
             {"query_cache_tag", "", "", "New setting for labeling query cache settings."},
             {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
             {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
@@ -93,7 +94,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"allow_experimental_json_type", false, false, "Add new experimental JSON type"},
             {"use_json_alias_for_old_object_type", true, false, "Use JSON type alias to create new JSON type"},
             {"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"},
-            {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
             {"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
             {"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"}
         }

From 8cf632312568560bd2989b43c7a855942b20a46a Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 00:48:29 +0200
Subject: [PATCH 040/114] fix black

---
 tests/integration/test_storage_hdfs/test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index ea8c4efa745..a75c13b9ea6 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1269,7 +1269,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     assert r == f"Elizabeth\n"
 
 
-
 def test_hive_partitioning_with_all_parameters(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(

From 0f3c7ae8c202f475fe55f33f45e9bca92155d52c Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 22 Aug 2024 01:15:16 +0200
Subject: [PATCH 041/114] feat: add docs

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index ffd9fae7f45..308e285c4bd 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1,4 +1,4 @@
-personal_ws-1.1 en 2942
+personal_ws-1.1 en 2983
 AArch
 ACLs
 ALTERs
@@ -957,6 +957,7 @@ ThreadPoolRemoteFSReaderThreads
 ThreadPoolRemoteFSReaderThreadsActive
 ThreadsActive
 ThreadsInOvercommitTracker
+TimeSeries
 Timeunit
 TinyLog
 Tkachenko
@@ -1098,12 +1099,12 @@ addressToLineWithInlines
 addressToSymbol
 adviced
 agg
+aggThrow
 aggregatefunction
 aggregatingmergetree
 aggregatio
 aggretate
 aggthrow
-aggThrow
 aiochclient
 allocator
 alphaTokens
@@ -1875,8 +1876,8 @@ joinGet
 joinGetOrNull
 json
 jsonMergePatch
-jsonasstring
 jsonasobject
+jsonasstring
 jsoncolumns
 jsoncolumnsmonoblock
 jsoncompact
@@ -1917,8 +1918,8 @@ kurtSamp
 kurtosis
 kurtpop
 kurtsamp
-laion
 lagInFrame
+laion
 lang
 laravel
 largestTriangleThreeBuckets
@@ -2020,7 +2021,6 @@ maxMap
 maxintersections
 maxintersectionsposition
 maxmap
-minMappedArrays
 maxmind
 mdadm
 meanZTest
@@ -2213,8 +2213,8 @@ parseReadableSizeOrZero
 parseTimeDelta
 parseable
 parsers
-partitionId
 partitionID
+partitionId
 pathFull
 pclmulqdq
 pcre
@@ -2443,6 +2443,7 @@ rewritable
 rightPad
 rightPadUTF
 rightUTF
+ripeMD
 risc
 riscv
 ro
@@ -2694,7 +2695,6 @@ themself
 threadpool
 throwIf
 timeDiff
-TimeSeries
 timeSeriesData
 timeSeriesMetrics
 timeSeriesTags

From 54caf1f84e3c3b5076adf29b49f4ee548f243091 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 22 Aug 2024 01:20:46 +0200
Subject: [PATCH 042/114] fix: wrap in conditional preprocessor directives

---
 src/Functions/FunctionsHashing.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 5111ee2bd90..ec39cf1e2cf 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -192,6 +192,7 @@ T combineHashesFunc(T t1, T t2)
     return HashFunction::apply(reinterpret_cast<const char *>(hashes), sizeof(hashes));
 }
 
+#if USE_SSL
 struct RipeMD160Impl
 {
     static constexpr auto name = "ripeMD160";
@@ -218,7 +219,7 @@ struct RipeMD160Impl
 
     static constexpr bool use_int_hash_for_pods = false;
 };
-
+#endif
 
 struct SipHash64Impl
 {
@@ -1647,6 +1648,7 @@ using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
 using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
 #if USE_SSL
 using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;
+using FunctionRipeMD160Hash = FunctionAnyHash<RipeMD160Impl>;
 #endif
 using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
 using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
@@ -1676,7 +1678,6 @@ using FunctionXXH3 = FunctionAnyHash<ImplXXH3>;
 
 using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
 
-using FunctionRipeMD160Hash = FunctionAnyHash<RipeMD160Impl>;
 }
 
 #pragma clang diagnostic pop

From be4439e3ec0a1491f4e333ac848844fd930a6e5b Mon Sep 17 00:00:00 2001
From: Alexey <signfinder@gmail.com>
Date: Thu, 22 Aug 2024 10:30:48 +0300
Subject: [PATCH 043/114] Update install.md

Added correct commands for russian vwersion of the installation from deb packets
---
 docs/ru/getting-started/install.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md
index aee445da843..4a0ec258c64 100644
--- a/docs/ru/getting-started/install.md
+++ b/docs/ru/getting-started/install.md
@@ -25,10 +25,10 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
 Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните:
 
 ``` bash
-sudo apt-get install -y apt-transport-https ca-certificates dirmngr
-sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
+sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
+curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg
 
-echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
+echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
     /etc/apt/sources.list.d/clickhouse.list
 sudo apt-get update
 

From 6466f374e0372b22a23d1193e534bd6c94f87b94 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 22 Aug 2024 11:29:33 +0200
Subject: [PATCH 044/114] Update geohash.md

---
 .../en/sql-reference/functions/geo/geohash.md | 31 ++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md
index b6ac7a74092..c4f41fc53da 100644
--- a/docs/en/sql-reference/functions/geo/geohash.md
+++ b/docs/en/sql-reference/functions/geo/geohash.md
@@ -6,7 +6,7 @@ title: "Functions for Working with Geohash"
 
 ## Geohash
 
-[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location.
+[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer the geohash string is, the more precise the geographic location will be.
 
 If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).
 
@@ -14,26 +14,37 @@ If you need to manually convert geographic coordinates to geohash strings, you c
 
 Encodes latitude and longitude as a [geohash](#geohash)-string.
 
+**Syntax**
+
 ``` sql
 geohashEncode(longitude, latitude, [precision])
 ```
 
 **Input values**
 
-- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`
-- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`
-- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`.
+- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data_types/float.md). 
+- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data_types/float.md).
+- `precision` (optional) — Length of the resulting encoded string. Defaults to `12`. Integer in the range `[1, 12]`. [Int8](../../data-types/int-uint.md).
+
+:::note
+- All coordinate parameters must be of the same type: either `Float32` or `Float64`.
+- For the `precision` parameter, any value less than `1` or greater than `12` is silently converted to `12`.
+:::
 
 **Returned values**
 
-- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used).
+- Alphanumeric string of the encoded coordinate (modified version of the base32-encoding alphabet is used). [String](../../data-types/string.md).
 
 **Example**
 
+Query:
+
 ``` sql
 SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res;
 ```
 
+Result:
+
 ``` text
 ┌─res──────────┐
 │ ezs42d000000 │
@@ -44,13 +55,19 @@ SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res;
 
 Decodes any [geohash](#geohash)-encoded string into longitude and latitude.
 
+**Syntax**
+
+```sql
+geohashDecode(hash_str)
+```
+
 **Input values**
 
-- encoded string - geohash-encoded string.
+- `hash_str` — Geohash-encoded string.
 
 **Returned values**
 
-- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude.
+- Tuple `(longitude, latitude)` of `Float64` values of longitude and latitude. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md))
 
 **Example**
 

From 95f45d2eaf39a9e8a6373c75749ec57f727be700 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 13:20:04 +0200
Subject: [PATCH 045/114] try to fix tests

---
 .../test_storage_azure_blob_storage/test.py   | 14 +++++------
 tests/integration/test_storage_hdfs/test.py   | 25 +++----------------
 .../03203_hive_style_partitioning.reference   | 20 +++++++--------
 .../03203_hive_style_partitioning.sh          | 14 +++--------
 4 files changed, 23 insertions(+), 50 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index a3172329a99..c1f518e45ce 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1513,14 +1513,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values}",
         settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
         f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}')"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}')"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
@@ -1533,7 +1533,7 @@ def test_hive_partitioning_with_one_parameter(cluster):
     query = (
         f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}');"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
     )
     assert azure_query(
         node, query, settings={"use_hive_partitioning": 1}
@@ -1551,14 +1551,14 @@ def test_hive_partitioning_with_all_parameters(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
         settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
         f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}');"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
     )
     pattern = r"DB::Exception: Cannot use hive partitioning for file"
 
@@ -1577,14 +1577,14 @@ def test_hive_partitioning_without_setting(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
         settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
         f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}');"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
     )
     pattern = re.compile(
         r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index a75c13b9ea6..31cc8609eb4 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1259,35 +1259,16 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
 
 def test_hive_partitioning_with_one_parameter(started_cluster):
     hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n")
-    assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
+    hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n")
+    assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n"
 
     r = node1.query(
-        "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')",
         settings={"use_hive_partitioning": 1},
     )
     assert r == f"Elizabeth\n"
 
 
-def test_hive_partitioning_with_all_parameters(started_cluster):
-    hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(
-        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
-    )
-    assert (
-        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
-        == f"Elizabeth\tGordon\n"
-    )
-
-    pattern = r"DB::Exception: Cannot use hive partitioning for file"
-
-    with pytest.raises(QueryRuntimeException, match=pattern):
-        node1.query(
-            f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-            settings={"use_hive_partitioning": 1},
-        )
-
-
 def test_hive_partitioning_without_setting(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index af52dcd9b88..acdadc2510b 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -1,5 +1,5 @@
 TESTING THE FILE HIVE PARTITIONING
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -19,8 +19,7 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -35,12 +34,13 @@ Cross	Elizabeth
 Array(Int64)	LowCardinality(Float64)
 101
 2070
+4081
+2070
 2070
 b
 1
-1
 TESTING THE URL PARTITIONING
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -60,10 +60,9 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
 1
 TESTING THE S3 PARTITIONING
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -83,8 +82,7 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -96,7 +94,7 @@ Delgado	Elizabeth
 Cross	Elizabeth
 OK
 TESTING THE S3CLUSTER PARTITIONING
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
@@ -106,7 +104,7 @@ Gibson	Elizabeth
 Greer	Elizabeth
 Delgado	Elizabeth
 Cross	Elizabeth
-last	Elizabeth
+ last	Elizabeth
 Frank	Elizabeth
 Moreno	Elizabeth
 Guzman	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 4e165446c34..b3d196924af 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -14,7 +14,7 @@ set use_hive_partitioning = 1;
 SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
 
 SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
 SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
@@ -29,16 +29,10 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c
 SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """
 
-$CLICKHOUSE_LOCAL -n -q """
-set use_hive_partitioning = 1;
-
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
-""" 2>&1 | grep -c "INCORRECT_DATA"
-
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
@@ -68,7 +62,7 @@ set use_hive_partitioning = 1;
 SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
 SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth';
+SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
 """
 
 $CLICKHOUSE_CLIENT -n -q """
@@ -84,5 +78,5 @@ set use_hive_partitioning = 1;
 
 SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth';
+SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
 """

From 62054cae666244fd072a56f70a6df73e68249cb0 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 22 Aug 2024 13:49:16 +0200
Subject: [PATCH 046/114] Update geohash.md

---
 docs/en/sql-reference/functions/geo/geohash.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md
index c4f41fc53da..ce2e3c43b3e 100644
--- a/docs/en/sql-reference/functions/geo/geohash.md
+++ b/docs/en/sql-reference/functions/geo/geohash.md
@@ -22,8 +22,8 @@ geohashEncode(longitude, latitude, [precision])
 
 **Input values**
 
-- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data_types/float.md). 
-- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data_types/float.md).
+- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data-types/float.md). 
+- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data-types/float.md).
 - `precision` (optional) — Length of the resulting encoded string. Defaults to `12`. Integer in the range `[1, 12]`. [Int8](../../data-types/int-uint.md).
 
 :::note

From 84467077b886cd48c9cd33c69c1935b3f7863dd7 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 22 Aug 2024 13:45:13 +0200
Subject: [PATCH 047/114] Fix test for role expiration in RoleCache.

---
 tests/integration/test_role/test.py | 81 +++++++++--------------------
 1 file changed, 26 insertions(+), 55 deletions(-)

diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py
index b3b18dc8271..9d15f0f81db 100644
--- a/tests/integration/test_role/test.py
+++ b/tests/integration/test_role/test.py
@@ -418,72 +418,43 @@ def test_function_current_roles():
     )
 
 
-def test_role_expiration():
-    instance.query("CREATE USER ure")
+@pytest.mark.parametrize("with_extra_role", [False, True])
+def test_role_expiration(with_extra_role):
     instance.query("CREATE ROLE rre")
-    instance.query("GRANT rre TO ure")
+    instance.query("CREATE USER ure DEFAULT ROLE rre")
 
-    instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log")
-    instance.query("INSERT INTO tre VALUES (0)")
+    instance.query("CREATE TABLE table1 (id Int) Engine=Log")
+    instance.query("CREATE TABLE table2 (id Int) Engine=Log")
+    instance.query("INSERT INTO table1 VALUES (1)")
+    instance.query("INSERT INTO table2 VALUES (2)")
 
+    instance.query("GRANT SELECT ON table1 TO rre")
+
+    assert instance.query("SELECT * FROM table1", user="ure") == "1\n"
     assert "Not enough privileges" in instance.query_and_get_error(
-        "SELECT * FROM tre", user="ure"
+        "SELECT * FROM table2", user="ure"
     )
 
-    instance.query("GRANT SELECT ON tre TO rre")
-
-    assert instance.query("SELECT * FROM tre", user="ure") == "0\n"
-
     # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test
     # so we wait >2 seconds until the role is expired
     time.sleep(5)
 
-    instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log")
-    instance.query("INSERT INTO tre1 VALUES (0)")
-    instance.query("GRANT SELECT ON tre1 TO rre")
+    if with_extra_role:
+        # Expiration of role "rre" from the role cache can be caused by another role being used.
+        instance.query("CREATE ROLE extra_role")
+        instance.query("CREATE USER extra_user DEFAULT ROLE extra_role")
+        instance.query("GRANT SELECT ON table1 TO extra_role")
+        assert instance.query("SELECT * FROM table1", user="extra_user") == "1\n"
 
-    assert instance.query("SELECT * from tre1", user="ure") == "0\n"
+    instance.query("GRANT SELECT ON table2 TO rre")
+    assert instance.query("SELECT * FROM table1", user="ure") == "1\n"
+    assert instance.query("SELECT * FROM table2", user="ure") == "2\n"
 
-    instance.query("DROP USER ure")
     instance.query("DROP ROLE rre")
-    instance.query("DROP TABLE tre")
-    instance.query("DROP TABLE tre1")
-
-
-def test_two_roles_expiration():
-    instance.query("CREATE USER ure")
-    instance.query("CREATE ROLE rre")
-    instance.query("GRANT rre TO ure")
-
-    instance.query("CREATE ROLE rre_second")
-
-    instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log")
-    instance.query("INSERT INTO tre VALUES (0)")
-
-    assert "Not enough privileges" in instance.query_and_get_error(
-        "SELECT * FROM tre", user="ure"
-    )
-
-    instance.query("GRANT SELECT ON tre TO rre")
-
-    assert instance.query("SELECT * FROM tre", user="ure") == "0\n"
-
-    # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test
-    # so we wait >2 seconds until the roles are expired
-    time.sleep(5)
-
-    instance.query(
-        "GRANT SELECT ON tre1 TO rre_second"
-    )  # we expect that both rre and rre_second are gone from cache upon this operation
-
-    instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log")
-    instance.query("INSERT INTO tre1 VALUES (0)")
-    instance.query("GRANT SELECT ON tre1 TO rre")
-
-    assert instance.query("SELECT * from tre1", user="ure") == "0\n"
-
     instance.query("DROP USER ure")
-    instance.query("DROP ROLE rre")
-    instance.query("DROP ROLE rre_second")
-    instance.query("DROP TABLE tre")
-    instance.query("DROP TABLE tre1")
+    instance.query("DROP TABLE table1")
+    instance.query("DROP TABLE table2")
+
+    if with_extra_role:
+        instance.query("DROP ROLE extra_role")
+        instance.query("DROP USER extra_user")

From 664e9b3db9d47e45c642ad21e3a5273ab423199a Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sun, 4 Aug 2024 13:30:41 +0200
Subject: [PATCH 048/114] Add one more test.

---
 tests/integration/test_role/test.py | 173 ++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)

diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py
index 9d15f0f81db..225cab975ff 100644
--- a/tests/integration/test_role/test.py
+++ b/tests/integration/test_role/test.py
@@ -1,5 +1,6 @@
 import time
 import pytest
+import random
 from helpers.client import QueryRuntimeException
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
@@ -458,3 +459,175 @@ def test_role_expiration(with_extra_role):
     if with_extra_role:
         instance.query("DROP ROLE extra_role")
         instance.query("DROP USER extra_user")
+
+
+def test_roles_cache():
+    # This test takes 20 seconds.
+    test_time = 20
+
+    # Three users A, B, C.
+    users = ["A", "B", "C"]
+    instance.query("CREATE USER " + ", ".join(users))
+
+    # Table "tbl" has 10 columns. Each of the users has access to a different set of columns.
+    num_columns = 10
+    columns = [f"x{i}" for i in range(1, num_columns + 1)]
+    columns_with_types = [column + " Int64" for column in columns]
+    columns_with_types_comma_separated = ", ".join(columns_with_types)
+    values = list(range(1, num_columns + 1))
+    values_comma_separated = ", ".join([str(value) for value in values])
+    instance.query(
+        f"CREATE TABLE tbl ({columns_with_types_comma_separated}) ENGINE=MergeTree ORDER BY tuple()"
+    )
+    instance.query(f"INSERT INTO tbl VALUES ({values_comma_separated})")
+    columns_to_values = dict([(f"x{i}", i) for i in range(1, num_columns + 1)])
+
+    # In this test we create and modify roles multiple times along with updating the following variables.
+    # Then we check that each of the users has access to the expected set of columns.
+    roles = []
+    users_to_roles = dict([(user, []) for user in users])
+    roles_to_columns = {}
+
+    # Checks that each of the users can access the expected set of columns and can't access other columns.
+    def check():
+        for user in random.sample(users, len(users)):
+            expected_roles = users_to_roles[user]
+            expected_columns = list(
+                set(sum([roles_to_columns[role] for role in expected_roles], []))
+            )
+            expected_result = sorted(
+                [columns_to_values[column] for column in expected_columns]
+            )
+            query = " UNION ALL ".join(
+                [
+                    f"SELECT * FROM viewIfPermitted(SELECT {column} AS c FROM tbl ELSE null('c Int64'))"
+                    for column in columns
+                ]
+            )
+            result = instance.query(query, user=user).splitlines()
+            result = sorted([int(value) for value in result])
+            ok = result == expected_result
+            if not ok:
+                print(f"Show grants for {user}:")
+                print(
+                    instance.query(
+                        "SHOW GRANTS FOR " + ", ".join([user] + expected_roles)
+                    )
+                )
+                print(f"Expected result: {expected_result}")
+                print(f"Got unexpected result: {result}")
+            assert ok
+
+    # Grants one of our roles a permission to access one of the columns.
+    def grant_column():
+        columns_used_in_roles = sum(roles_to_columns.values(), [])
+        columns_to_choose = [
+            column for column in columns if column not in columns_used_in_roles
+        ]
+        if not columns_to_choose or not roles:
+            return False
+        column = random.choice(columns_to_choose)
+        role = random.choice(roles)
+        instance.query(f"GRANT SELECT({column}) ON tbl TO {role}")
+        roles_to_columns[role].append(column)
+        return True
+
+    # Revokes a permission to access one of the granted column from all our roles.
+    def revoke_column():
+        columns_used_in_roles = sum(roles_to_columns.values(), [])
+        columns_to_choose = list(set(columns_used_in_roles))
+        if not columns_to_choose or not roles:
+            return False
+        column = random.choice(columns_to_choose)
+        roles_str = ", ".join(roles)
+        instance.query(f"REVOKE SELECT({column}) ON tbl FROM {roles_str}")
+        for role in roles_to_columns:
+            if column in roles_to_columns[role]:
+                roles_to_columns[role].remove(column)
+        return True
+
+    # Creates a role and grants it to one of the users.
+    def create_role():
+        for role in ["R1", "R2", "R3"]:
+            if role not in roles:
+                instance.query(f"CREATE ROLE {role}")
+                roles.append(role)
+                if role not in roles_to_columns:
+                    roles_to_columns[role] = []
+        if "R1" not in users_to_roles["A"]:
+            instance.query("GRANT R1 TO A")
+            users_to_roles["A"].append("R1")
+        elif "R2" not in users_to_roles["B"]:
+            instance.query("GRANT R2 TO B")
+            users_to_roles["B"].append("R2")
+        elif "R3" not in users_to_roles["B"]:
+            instance.query("GRANT R3 TO R2")
+            users_to_roles["B"].append("R3")
+        elif "R3" not in users_to_roles["C"]:
+            instance.query("GRANT R3 TO C")
+            users_to_roles["C"].append("R3")
+        else:
+            return False
+        return True
+
+    # Drops one of our roles.
+    def drop_role():
+        if not roles:
+            return False
+        role = random.choice(roles)
+        instance.query(f"DROP ROLE {role}")
+        roles.remove(role)
+        for u in users_to_roles:
+            if role in users_to_roles[u]:
+                users_to_roles[u].remove(role)
+        del roles_to_columns[role]
+        if (role == "R2") and ("R3" in users_to_roles["B"]):
+            users_to_roles["B"].remove("R3")
+        return True
+
+    # Modifies some grants or roles randomly.
+    def modify():
+        while True:
+            rnd = random.random()
+            if rnd < 0.4:
+                if grant_column():
+                    break
+            elif rnd < 0.5:
+                if revoke_column():
+                    break
+            elif rnd < 0.9:
+                if create_role():
+                    break
+            else:
+                if drop_role():
+                    break
+
+    def maybe_modify():
+        if random.random() < 0.9:
+            modify()
+            modify()
+
+    # Sleeping is necessary in this test because the role cache in ClickHouse has expiration timeout.
+    def maybe_sleep():
+        if random.random() < 0.1:
+            # "role_cache_expiration_time_seconds" is set to 2 seconds in the test configuration.
+            # We need a sleep longer than that in this test sometimes.
+            seconds = random.random() * 5
+            print(f"Sleeping {seconds} seconds")
+            time.sleep(seconds)
+
+    # Main part of the test.
+    start_time = time.time()
+    end_time = start_time + test_time
+
+    while time.time() < end_time:
+        check()
+        maybe_sleep()
+        maybe_modify()
+        maybe_sleep()
+
+    check()
+
+    instance.query("DROP USER " + ", ".join(users))
+    instance.query("DROP ROLE " + ", ".join(roles))
+    instance.query("DROP TABLE tbl")

From 7ef5c366e873c4fd99f257eefbb3a350848e308c Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Sun, 4 Aug 2024 13:33:50 +0200
Subject: [PATCH 049/114] Fix expiration in RoleCache.

---
 src/Access/RoleCache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp
index 2d94df2eea5..cc1f1520b67 100644
--- a/src/Access/RoleCache.cpp
+++ b/src/Access/RoleCache.cpp
@@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO
     SubscriptionsOnRoles new_subscriptions_on_roles;
     new_subscriptions_on_roles.reserve(subscriptions_on_roles.size());
 
-    auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); };
+    auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); };
 
     for (const auto & current_role : enabled_roles.params.current_roles)
         collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false);

From 54dd3afd49df9c92cd3621a5cec4c7464c341a71 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 22 Aug 2024 14:52:17 +0200
Subject: [PATCH 050/114] Turn off fault injection for insert in
 01396_inactive_replica_cleanup_nodes_zookeeper

---
 .../01396_inactive_replica_cleanup_nodes_zookeeper.sh        | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index bff85b3e29f..9ea15071856 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -23,11 +23,10 @@ $CLICKHOUSE_CLIENT -n --query "
     DETACH TABLE r2;
 "
 
-$CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
+# insert_keeper_fault_injection_probability=0 -- can slowdown insert a lot (produce a lot of parts)
+$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
 
 
-# Now wait for cleanup thread
-
 for _ in {1..60}; do
     $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
     [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break;

From 7a740819b9551a291827b9d37b8b724612587a20 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 22 Aug 2024 14:53:15 +0200
Subject: [PATCH 051/114] Accidentally deleted comment

---
 .../01396_inactive_replica_cleanup_nodes_zookeeper.sh            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index 9ea15071856..80e9253af2c 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -27,6 +27,7 @@ $CLICKHOUSE_CLIENT -n --query "
 $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
 
 
+# Now wait for cleanup thread
 for _ in {1..60}; do
     $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
     [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break;

From b3f084459f60b1e31c32736573af0810dee99230 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 14:53:53 +0200
Subject: [PATCH 052/114] fix black

---
 tests/integration/test_storage_hdfs/test.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 31cc8609eb4..b18940b7290 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1259,8 +1259,13 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
 
 def test_hive_partitioning_with_one_parameter(started_cluster):
     hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n")
-    assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n"
+    hdfs_api.write_data(
+        f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n"
+    )
+    assert (
+        hdfs_api.read_data(f"/column0=Elizabeth/file_1")
+        == f"column0,column1\nElizabeth,Gordon\n"
+    )
 
     r = node1.query(
         "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')",
@@ -1269,6 +1274,7 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     assert r == f"Elizabeth\n"
 
 
+
 def test_hive_partitioning_without_setting(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(

From 8d14d8523098a42cd778ef50a9b066508da8919c Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:24:33 +0200
Subject: [PATCH 053/114] fix black

---
 tests/integration/test_storage_hdfs/test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index b18940b7290..7a92e8adb0d 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1274,7 +1274,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
     assert r == f"Elizabeth\n"
 
 
-
 def test_hive_partitioning_without_setting(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     hdfs_api.write_data(

From add4718634317304f652579a9f201c3b81c96a7d Mon Sep 17 00:00:00 2001
From: Tanya Bragin <tbragin@users.noreply.github.com>
Date: Thu, 22 Aug 2024 06:37:27 -0700
Subject: [PATCH 054/114] Update README.md - Meetups update

Fixed one meetup location; Added more meetups
---
 README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5e66b9da73e..c9474ef0fc0 100644
--- a/README.md
+++ b/README.md
@@ -45,9 +45,17 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey
 * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
 * [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9
-* [New York Meetup (Ramp)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
+* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
 
+Other upcoming meetups
+* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27
+* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27
+* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
+* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
+* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
+* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
+
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
 * **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now!

From 91e65feaaedd4806875aed3d4be4f07edeefdb71 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 22 Aug 2024 13:42:30 +0000
Subject: [PATCH 055/114] fix virtual columns in Merge engine

---
 src/Storages/StorageDistributed.cpp                |  2 +-
 src/Storages/StorageMerge.cpp                      | 14 +++++++-------
 .../02890_describe_table_options.reference         |  8 ++++++++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index c4668159759..0b80858800b 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -290,7 +290,7 @@ VirtualColumnsDescription StorageDistributed::createVirtuals()
 
     desc.addEphemeral("_shard_num", std::make_shared<DataTypeUInt32>(), "Deprecated. Use function shardNum instead");
 
-    /// Add virtual columns from table of storage Merges.
+    /// Add virtual columns from table with Merge engine.
     desc.addEphemeral("_database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of database which the row comes from");
     desc.addEphemeral("_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of table which the row comes from");
 
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index e88844e2d31..0827321e296 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -642,10 +642,6 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
                     column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
             }
         }
-        else
-        {
-
-        }
 
         auto child = createPlanForTable(
             nested_storage_snaphsot,
@@ -657,6 +653,7 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
             row_policy_data_opt,
             modified_context,
             current_streams);
+
         child.plan.addInterpreterContext(modified_context);
 
         if (child.plan.isInitialized())
@@ -914,12 +911,14 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
         modified_query_info.table_expression = replacement_table_expression;
         modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
 
-        auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
-        if (storage_snapshot_->storage.supportsSubcolumns())
-            get_column_options.withSubcolumns();
+        auto get_column_options = GetColumnsOptions(GetColumnsOptions::All)
+            .withExtendedObjects()
+            .withSubcolumns(storage_snapshot_->storage.supportsSubcolumns());
 
         std::unordered_map<std::string, QueryTreeNodePtr> column_name_to_node;
 
+        /// Consider only non-virtual columns of storage while checking for _table and _database columns.
+        /// I.e. always override virtual columns with these names from underlying table (if any).
         if (!storage_snapshot_->tryGetColumn(get_column_options, "_table"))
         {
             auto table_name_node = std::make_shared<ConstantNode>(current_storage_id.table_name);
@@ -946,6 +945,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
             column_name_to_node.emplace("_database", function_node);
         }
 
+        get_column_options.withVirtuals();
         auto storage_columns = storage_snapshot_->metadata->getColumns();
 
         bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty();
diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference
index 9181cb27cb0..b77ef4a0fdf 100644
--- a/tests/queries/0_stateless/02890_describe_table_options.reference
+++ b/tests/queries/0_stateless/02890_describe_table_options.reference
@@ -54,6 +54,8 @@ _row_exists	UInt8			Persisted mask created by lightweight delete that show wheth
 _block_number	UInt64			Persisted original number of block that was assigned at insert	Delta, LZ4		1
 _block_offset	UInt64			Persisted original number of row in block that was assigned at insert	Delta, LZ4		1
 _shard_num	UInt32			Deprecated. Use function shardNum instead			1
+_database	LowCardinality(String)			The name of database which the row comes from			1
+_table	LowCardinality(String)			The name of table which the row comes from			1
 SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1;
 DESCRIBE TABLE t_describe_options;
 id	UInt64			index column			0	0
@@ -87,6 +89,8 @@ _row_exists	UInt8			Persisted mask created by lightweight delete that show wheth
 _block_number	UInt64			Persisted original number of block that was assigned at insert	Delta, LZ4		0	1
 _block_offset	UInt64			Persisted original number of row in block that was assigned at insert	Delta, LZ4		0	1
 _shard_num	UInt32			Deprecated. Use function shardNum instead			0	1
+_database	LowCardinality(String)			The name of database which the row comes from			0	1
+_table	LowCardinality(String)			The name of table which the row comes from			0	1
 arr.size0	UInt64						1	0
 t.a	String				ZSTD(1)		1	0
 t.b	UInt64				ZSTD(1)		1	0
@@ -144,6 +148,8 @@ _row_exists	UInt8	1
 _block_number	UInt64	1
 _block_offset	UInt64	1
 _shard_num	UInt32	1
+_database	LowCardinality(String)	1
+_table	LowCardinality(String)	1
 SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1;
 DESCRIBE TABLE t_describe_options;
 id	UInt64	0	0
@@ -177,6 +183,8 @@ _row_exists	UInt8	0	1
 _block_number	UInt64	0	1
 _block_offset	UInt64	0	1
 _shard_num	UInt32	0	1
+_database	LowCardinality(String)	0	1
+_table	LowCardinality(String)	0	1
 arr.size0	UInt64	1	0
 t.a	String	1	0
 t.b	UInt64	1	0

From ce33943b430a9ad512f4942083889dea4decb778 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:50:59 +0200
Subject: [PATCH 056/114] Fix flaky check

---
 tests/docker_scripts/stateless_runner.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/docker_scripts/stateless_runner.sh b/tests/docker_scripts/stateless_runner.sh
index 40a63f74a6b..d8921a04458 100755
--- a/tests/docker_scripts/stateless_runner.sh
+++ b/tests/docker_scripts/stateless_runner.sh
@@ -339,7 +339,7 @@ export -f run_tests
 if [ "$NUM_TRIES" -gt "1" ]; then
     # We don't run tests with Ordinary database in PRs, only in master.
     # So run new/changed tests with Ordinary at least once in flaky check.
-    NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \
+    NUM_TRIES=1 USE_DATABASE_ORDINARY=1 run_tests \
       | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
 fi
 

From a9e793532ae308767da3bc4da74d9631cd85eb90 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 22 Aug 2024 16:34:14 +0200
Subject: [PATCH 057/114] fix shutdown for PeriodicLog

---
 src/Interpreters/PeriodicLog.cpp | 3 ++-
 src/Interpreters/PeriodicLog.h   | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp
index 9d2891e11eb..15970ca5b81 100644
--- a/src/Interpreters/PeriodicLog.cpp
+++ b/src/Interpreters/PeriodicLog.cpp
@@ -1,6 +1,7 @@
 #include <Interpreters/PeriodicLog.h>
 #include <Interpreters/ErrorLog.h>
 #include <Interpreters/MetricLog.h>
+#include "Functions/DateTimeTransforms.h"
 
 namespace DB
 {
@@ -27,7 +28,7 @@ template <typename LogElement>
 void PeriodicLog<LogElement>::shutdown()
 {
     stopCollect();
-    this->stopFlushThread();
+    Base::shutdown();
 }
 
 template <typename LogElement>
diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h
index 08c3f7eb23f..ceac8088d40 100644
--- a/src/Interpreters/PeriodicLog.h
+++ b/src/Interpreters/PeriodicLog.h
@@ -17,6 +17,7 @@ template <typename LogElement>
 class PeriodicLog : public SystemLog<LogElement>
 {
     using SystemLog<LogElement>::SystemLog;
+    using Base = SystemLog<LogElement>;
 
 public:
     using TimePoint = std::chrono::system_clock::time_point;
@@ -24,12 +25,12 @@ public:
     /// Launches a background thread to collect metrics with interval
     void startCollect(size_t collect_interval_milliseconds_);
 
-    /// Stop background thread
-    void stopCollect();
-
     void shutdown() final;
 
 protected:
+    /// Stop background thread
+    void stopCollect();
+
     virtual void stepFunction(TimePoint current_time) = 0;
 
 private:

From 5340ac5fbc7fba75d6a743d345c0f79dc466df0b Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 14:39:19 +0000
Subject: [PATCH 058/114] Update version_date.tsv and changelogs after
 v24.5.5.41-stable

---
 docs/changelogs/v24.5.5.41-stable.md | 71 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 +
 2 files changed, 73 insertions(+)
 create mode 100644 docs/changelogs/v24.5.5.41-stable.md

diff --git a/docs/changelogs/v24.5.5.41-stable.md b/docs/changelogs/v24.5.5.41-stable.md
new file mode 100644
index 00000000000..8ba160e31d7
--- /dev/null
+++ b/docs/changelogs/v24.5.5.41-stable.md
@@ -0,0 +1,71 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.5.5.41-stable (441d4a6ebe3) FIXME as compared to v24.5.4.49-stable (63b760955a0)
+
+#### Improvement
+* Backported in [#66768](https://github.com/ClickHouse/ClickHouse/issues/66768): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#65350](https://github.com/ClickHouse/ClickHouse/issues/65350): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#65621](https://github.com/ClickHouse/ClickHouse/issues/65621): Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66884](https://github.com/ClickHouse/ClickHouse/issues/66884): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#65933](https://github.com/ClickHouse/ClickHouse/issues/65933): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#66301](https://github.com/ClickHouse/ClickHouse/issues/66301): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66328](https://github.com/ClickHouse/ClickHouse/issues/66328): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#66155](https://github.com/ClickHouse/ClickHouse/issues/66155): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#66454](https://github.com/ClickHouse/ClickHouse/issues/66454): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#66226](https://github.com/ClickHouse/ClickHouse/issues/66226): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66680](https://github.com/ClickHouse/ClickHouse/issues/66680): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#66604](https://github.com/ClickHouse/ClickHouse/issues/66604): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
+* Backported in [#66360](https://github.com/ClickHouse/ClickHouse/issues/66360): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#66972](https://github.com/ClickHouse/ClickHouse/issues/66972): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66691](https://github.com/ClickHouse/ClickHouse/issues/66691): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Backported in [#66969](https://github.com/ClickHouse/ClickHouse/issues/66969): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66720](https://github.com/ClickHouse/ClickHouse/issues/66720): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#66951](https://github.com/ClickHouse/ClickHouse/issues/66951): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66757](https://github.com/ClickHouse/ClickHouse/issues/66757): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66948](https://github.com/ClickHouse/ClickHouse/issues/66948): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67633](https://github.com/ClickHouse/ClickHouse/issues/67633): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Backported in [#67481](https://github.com/ClickHouse/ClickHouse/issues/67481): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
+* Backported in [#67814](https://github.com/ClickHouse/ClickHouse/issues/67814): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67197](https://github.com/ClickHouse/ClickHouse/issues/67197): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#67379](https://github.com/ClickHouse/ClickHouse/issues/67379): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#67501](https://github.com/ClickHouse/ClickHouse/issues/67501): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67576](https://github.com/ClickHouse/ClickHouse/issues/67576): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67850](https://github.com/ClickHouse/ClickHouse/issues/67850): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#66387](https://github.com/ClickHouse/ClickHouse/issues/66387): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66426](https://github.com/ClickHouse/ClickHouse/issues/66426): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66544](https://github.com/ClickHouse/ClickHouse/issues/66544): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66859](https://github.com/ClickHouse/ClickHouse/issues/66859): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
+* Backported in [#66875](https://github.com/ClickHouse/ClickHouse/issues/66875): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67059](https://github.com/ClickHouse/ClickHouse/issues/67059): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
+* Backported in [#66945](https://github.com/ClickHouse/ClickHouse/issues/66945): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67252](https://github.com/ClickHouse/ClickHouse/issues/67252): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67412](https://github.com/ClickHouse/ClickHouse/issues/67412): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).
+* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8556375d543..9063d3ef971 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -6,6 +6,7 @@ v24.6.3.95-stable	2024-08-06
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05
+v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13
@@ -14,6 +15,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.9.5-lts	2024-08-22
 v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02

From 1ea0163dfe6b3278d8a5e8d86c31b3d63d7a3780 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Thu, 22 Aug 2024 16:42:14 +0200
Subject: [PATCH 059/114] Fix issue with maps with arrays as keys

---
 src/Functions/FunctionsHashing.h                        | 4 ++--
 tests/queries/0_stateless/02534_keyed_siphash.reference | 7 ++++++-
 tests/queries/0_stateless/02534_keyed_siphash.sql       | 5 ++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 0cf4246fd66..3da0b2cd9be 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -93,9 +93,9 @@ namespace impl
             if (is_const)
                 i = 0;
             assert(key0->size() == key1->size());
-            if (offsets != nullptr)
+            if (offsets != nullptr && i > 0)
             {
-                const auto * const begin = offsets->begin();
+                const auto * const begin = std::upper_bound(offsets->begin(), offsets->end(), i - 1);
                 const auto * upper = std::upper_bound(begin, offsets->end(), i);
                 if (upper != offsets->end())
                     i = upper - begin;
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index 31c0cae8981..8b147025a05 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -244,5 +244,10 @@ Test emtpy arrays and maps
 0AD04BFD000000000000000000000000
 4761183170873013810
 0AD04BFD000000000000000000000000
+Test maps with arrays as keys
 16734549324845627102
-D675BB3D687973A238AB891DD99C7047
+1D03941D808D04810D2363A6C107D622
+16734549324845627102
+16734549324845627102
+1D03941D808D04810D2363A6C107D622
+1D03941D808D04810D2363A6C107D622
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index b499d8ef02b..ba3c4a9156d 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -351,5 +351,8 @@ SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []);
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []));
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []));
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])));
+SELECT 'Test maps with arrays as keys';
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3));
-SELECT hex(sipHash128Keyed((0::UInt64, 0::UInt64), map([0], 1, [2], 3)));
+SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)));
+SELECT sipHash64Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3)) FROM numbers(2);
+SELECT hex(sipHash128Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3))) FROM numbers(2);

From a93d1919804d1c8dc7760f20084ade9a09710a47 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Thu, 22 Aug 2024 16:43:38 +0200
Subject: [PATCH 060/114] Fix typo in test case

---
 tests/queries/0_stateless/02534_keyed_siphash.reference | 2 +-
 tests/queries/0_stateless/02534_keyed_siphash.sql       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index 8b147025a05..a05446a494e 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -239,7 +239,7 @@ Check bug found fuzzing
 Test arrays and maps
 608E1FF030C9E206185B112C2A25F1A7
 ABB65AE97711A2E053E324ED88B1D08B
-Test emtpy arrays and maps
+Test empty arrays and maps
 4761183170873013810
 0AD04BFD000000000000000000000000
 4761183170873013810
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index ba3c4a9156d..7cfc82512bd 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -346,7 +346,7 @@ INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g'
 SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a;
 DROP TABLE sipHashKeyed_keys;
 
-SELECT 'Test emtpy arrays and maps';
+SELECT 'Test empty arrays and maps';
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []);
 SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []));
 SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []));

From 0dc18247df3a290b4fb312325ff3b2a44a3f8357 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:10:24 +0000
Subject: [PATCH 061/114] Update version_date.tsv and changelogs after
 v24.6.3.38-stable

---
 docs/changelogs/v24.6.3.38-stable.md | 83 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  3 +
 2 files changed, 86 insertions(+)
 create mode 100644 docs/changelogs/v24.6.3.38-stable.md

diff --git a/docs/changelogs/v24.6.3.38-stable.md b/docs/changelogs/v24.6.3.38-stable.md
new file mode 100644
index 00000000000..01d7e26e31f
--- /dev/null
+++ b/docs/changelogs/v24.6.3.38-stable.md
@@ -0,0 +1,83 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.6.3.38-stable (4e33c831589) FIXME as compared to v24.6.2.17-stable (5710a8b5c0c)
+
+#### Improvement
+* Backported in [#66770](https://github.com/ClickHouse/ClickHouse/issues/66770): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#66885](https://github.com/ClickHouse/ClickHouse/issues/66885): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66303](https://github.com/ClickHouse/ClickHouse/issues/66303): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66330](https://github.com/ClickHouse/ClickHouse/issues/66330): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#66157](https://github.com/ClickHouse/ClickHouse/issues/66157): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#66210](https://github.com/ClickHouse/ClickHouse/issues/66210): Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66456](https://github.com/ClickHouse/ClickHouse/issues/66456): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#66228](https://github.com/ClickHouse/ClickHouse/issues/66228): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66183](https://github.com/ClickHouse/ClickHouse/issues/66183): Fix rare case with missing data in the result of distributed query, close [#61432](https://github.com/ClickHouse/ClickHouse/issues/61432). [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66271](https://github.com/ClickHouse/ClickHouse/issues/66271): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#66682](https://github.com/ClickHouse/ClickHouse/issues/66682): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#66587](https://github.com/ClickHouse/ClickHouse/issues/66587): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
+* Backported in [#66362](https://github.com/ClickHouse/ClickHouse/issues/66362): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#66613](https://github.com/ClickHouse/ClickHouse/issues/66613): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66693](https://github.com/ClickHouse/ClickHouse/issues/66693): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Backported in [#66577](https://github.com/ClickHouse/ClickHouse/issues/66577): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66721](https://github.com/ClickHouse/ClickHouse/issues/66721): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#66670](https://github.com/ClickHouse/ClickHouse/issues/66670): Fix reading of uninitialized memory when hashing empty tuples. This closes [#66559](https://github.com/ClickHouse/ClickHouse/issues/66559). [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#66952](https://github.com/ClickHouse/ClickHouse/issues/66952): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66956](https://github.com/ClickHouse/ClickHouse/issues/66956): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#66716](https://github.com/ClickHouse/ClickHouse/issues/66716): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#66759](https://github.com/ClickHouse/ClickHouse/issues/66759): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66751](https://github.com/ClickHouse/ClickHouse/issues/66751): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67635](https://github.com/ClickHouse/ClickHouse/issues/67635): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Backported in [#67482](https://github.com/ClickHouse/ClickHouse/issues/67482): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
+* Backported in [#67816](https://github.com/ClickHouse/ClickHouse/issues/67816): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67199](https://github.com/ClickHouse/ClickHouse/issues/67199): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Backported in [#67381](https://github.com/ClickHouse/ClickHouse/issues/67381): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#67244](https://github.com/ClickHouse/ClickHouse/issues/67244): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#67503](https://github.com/ClickHouse/ClickHouse/issues/67503): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67578](https://github.com/ClickHouse/ClickHouse/issues/67578): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#67852](https://github.com/ClickHouse/ClickHouse/issues/67852): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67808](https://github.com/ClickHouse/ClickHouse/issues/67808): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67838](https://github.com/ClickHouse/ClickHouse/issues/67838): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Backport [#66599](https://github.com/ClickHouse/ClickHouse/issues/66599) to 24.6: Fix dropping named collection in local storage"'. [#66922](https://github.com/ClickHouse/ClickHouse/pull/66922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#66332](https://github.com/ClickHouse/ClickHouse/issues/66332): Do not raise a NOT_IMPLEMENTED error when getting s3 metrics with a multiple disk configuration. [#65403](https://github.com/ClickHouse/ClickHouse/pull/65403) ([Elena Torró](https://github.com/elenatorro)).
+* Backported in [#66142](https://github.com/ClickHouse/ClickHouse/issues/66142): Fix flaky test_storage_s3_queue tests. [#66009](https://github.com/ClickHouse/ClickHouse/pull/66009) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#66389](https://github.com/ClickHouse/ClickHouse/issues/66389): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
+* Backported in [#66428](https://github.com/ClickHouse/ClickHouse/issues/66428): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#66546](https://github.com/ClickHouse/ClickHouse/issues/66546): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#66861](https://github.com/ClickHouse/ClickHouse/issues/66861): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
+* Backported in [#66877](https://github.com/ClickHouse/ClickHouse/issues/66877): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67061](https://github.com/ClickHouse/ClickHouse/issues/67061): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
+* Backported in [#66940](https://github.com/ClickHouse/ClickHouse/issues/66940): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67254](https://github.com/ClickHouse/ClickHouse/issues/67254): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67414](https://github.com/ClickHouse/ClickHouse/issues/67414): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).
+* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8556375d543..cc168f58862 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -3,9 +3,11 @@ v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
+v24.6.3.38-stable	2024-08-22
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05
+v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13
@@ -14,6 +16,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.9.5-lts	2024-08-22
 v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02

From 0b9c24f31d548c87deca3334282c14fc78a295ba Mon Sep 17 00:00:00 2001
From: Michael Stetsyuk <michael.stetsyuk@clickhouse.com>
Date: Thu, 15 Aug 2024 12:09:50 +0000
Subject: [PATCH 062/114] write metadata to disk and keeper in the same format

---
 src/Storages/ColumnsDescription.cpp           | 30 +++++---
 src/Storages/ColumnsDescription.h             |  6 +-
 .../__init__.py                               |  0
 .../config/enable_keeper.xml                  | 26 +++++++
 .../config/users.xml                          |  8 +++
 .../test.py                                   | 71 +++++++++++++++++++
 6 files changed, 128 insertions(+), 13 deletions(-)
 create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py
 create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml
 create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml
 create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py

diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 0d724245b49..0212bbd6fff 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -113,7 +113,15 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
         && ast_to_str(ttl) == ast_to_str(other.ttl);
 }
 
-void ColumnDescription::writeText(WriteBuffer & buf) const
+String formatASTStateAware(IAST & ast, IAST::FormatState & state)
+{
+    WriteBufferFromOwnString buf;
+    IAST::FormatSettings settings(buf, true, false);
+    ast.formatImpl(settings, state, IAST::FormatStateStacked());
+    return buf.str();
+}
+
+void ColumnDescription::writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const
 {
     /// NOTE: Serialization format is insane.
 
@@ -126,20 +134,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
         writeChar('\t', buf);
         DB::writeText(DB::toString(default_desc.kind), buf);
         writeChar('\t', buf);
-        writeEscapedString(queryToString(default_desc.expression), buf);
+        writeEscapedString(formatASTStateAware(*default_desc.expression, state), buf);
     }
 
-    if (!comment.empty())
+    if (!comment.empty() && include_comment)
     {
         writeChar('\t', buf);
         DB::writeText("COMMENT ", buf);
-        writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf);
+        auto ast = ASTLiteral(Field(comment));
+        writeEscapedString(formatASTStateAware(ast, state), buf);
     }
 
     if (codec)
     {
         writeChar('\t', buf);
-        writeEscapedString(queryToString(codec), buf);
+        writeEscapedString(formatASTStateAware(*codec, state), buf);
     }
 
     if (!settings.empty())
@@ -150,21 +159,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
         ASTSetQuery ast;
         ast.is_standalone = false;
         ast.changes = settings;
-        writeEscapedString(queryToString(ast), buf);
+        writeEscapedString(formatASTStateAware(ast, state), buf);
         DB::writeText(")", buf);
     }
 
     if (!statistics.empty())
     {
         writeChar('\t', buf);
-        writeEscapedString(queryToString(statistics.getAST()), buf);
+        writeEscapedString(formatASTStateAware(*statistics.getAST(), state), buf);
     }
 
     if (ttl)
     {
         writeChar('\t', buf);
         DB::writeText("TTL ", buf);
-        writeEscapedString(queryToString(ttl), buf);
+        writeEscapedString(formatASTStateAware(*ttl, state), buf);
     }
 
     writeChar('\n', buf);
@@ -895,16 +904,17 @@ void ColumnsDescription::resetColumnTTLs()
 }
 
 
-String ColumnsDescription::toString() const
+String ColumnsDescription::toString(bool include_comments) const
 {
     WriteBufferFromOwnString buf;
+    IAST::FormatState ast_format_state;
 
     writeCString("columns format version: 1\n", buf);
     DB::writeText(columns.size(), buf);
     writeCString(" columns:\n", buf);
 
     for (const ColumnDescription & column : columns)
-        column.writeText(buf);
+        column.writeText(buf, ast_format_state, include_comments);
 
     return buf.str();
 }
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index f0760160f0a..c89c26501e8 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -104,7 +104,7 @@ struct ColumnDescription
     bool operator==(const ColumnDescription & other) const;
     bool operator!=(const ColumnDescription & other) const { return !(*this == other); }
 
-    void writeText(WriteBuffer & buf) const;
+    void writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const;
     void readText(ReadBuffer & buf);
 };
 
@@ -137,7 +137,7 @@ public:
     /// NOTE Must correspond with Nested::flatten function.
     void flattenNested(); /// TODO: remove, insert already flattened Nested columns.
 
-    bool operator==(const ColumnsDescription & other) const { return columns == other.columns; }
+    bool operator==(const ColumnsDescription & other) const { return toString(false) == other.toString(false); }
     bool operator!=(const ColumnsDescription & other) const { return !(*this == other); }
 
     auto begin() const { return columns.begin(); }
@@ -221,7 +221,7 @@ public:
     /// Does column has non default specified compression codec
     bool hasCompressionCodec(const String & column_name) const;
 
-    String toString() const;
+    String toString(bool include_comments = true) const;
     static ColumnsDescription parse(const String & str);
 
     size_t size() const
diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml
new file mode 100644
index 00000000000..4ca4f604ec3
--- /dev/null
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml
@@ -0,0 +1,26 @@
+<clickhouse>
+    <keeper_server>
+        <tcp_port>2181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+        <coordination_settings>
+            <session_timeout_ms>20000</session_timeout_ms>
+        </coordination_settings>
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>9444</port>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+
+    <zookeeper>
+        <node index="1">
+            <host>localhost</host>
+            <port>2181</port>
+        </node>
+        <session_timeout_ms>20000</session_timeout_ms>
+    </zookeeper>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml
new file mode 100644
index 00000000000..c5de0b6819c
--- /dev/null
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+    <users>
+        <default>
+            <profile>default</profile>
+            <no_password></no_password>
+        </default>
+    </users>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py
new file mode 100644
index 00000000000..e0c15e18c23
--- /dev/null
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py
@@ -0,0 +1,71 @@
+import pytest
+import random
+import string
+
+from helpers.cluster import ClickHouseCluster
+
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance(
+    "node",
+    main_configs=[
+        "config/enable_keeper.xml",
+        "config/users.xml",
+    ],
+    stay_alive=True,
+    with_minio=True,
+    macros={"shard": 1, "replica": 1},
+)
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def randomize_table_name(table_name, random_suffix_length=10):
+    letters = string.ascii_letters + string.digits
+    return f"{table_name}_{''.join(random.choice(letters) for _ in range(random_suffix_length))}"
+
+
+@pytest.mark.parametrize("engine", ["ReplicatedMergeTree"])
+def test_aliases_in_default_expr_not_break_table_structure(start_cluster, engine):
+    """
+    Making sure that using aliases in columns' default expressions does not lead to having different columns metadata in ZooKeeper and on disk.
+    Issue: https://github.com/ClickHouse/clickhouse-private/issues/5150
+    """
+
+    data = '{"event": {"col1-key": "col1-val", "col2-key": "col2-val"}}'
+
+    table_name = randomize_table_name("t")
+
+    node.query(
+        f"""
+        DROP TABLE IF EXISTS {table_name};
+        CREATE TABLE {table_name}
+        (
+            `data` String,
+            `col1` String DEFAULT JSONExtractString(JSONExtractString(data, 'event') AS event, 'col1-key'),
+            `col2` String MATERIALIZED JSONExtractString(JSONExtractString(data, 'event') AS event, 'col2-key')
+        )
+        ENGINE = {engine}('/test/{table_name}', '{{replica}}')
+        ORDER BY col1
+        """
+    )
+
+    node.restart_clickhouse()
+
+    node.query(
+        f"""
+        INSERT INTO {table_name} (data) VALUES ('{data}');
+        """
+    )
+    assert node.query(f"SELECT data FROM {table_name}").strip() == data
+    assert node.query(f"SELECT col1 FROM {table_name}").strip() == "col1-val"
+    assert node.query(f"SELECT col2 FROM {table_name}").strip() == "col2-val"
+
+    node.query(f"DROP TABLE {table_name}")

From 859d2bfe273f571458be6f007761bc8c743d589a Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 22 Aug 2024 17:18:06 +0200
Subject: [PATCH 063/114] move stopFlushThread to SystemLogBase

---
 src/Common/SystemLogBase.cpp     | 19 +++++++++++++++++++
 src/Common/SystemLogBase.h       |  2 ++
 src/Interpreters/PeriodicLog.cpp |  6 +++---
 src/Interpreters/PeriodicLog.h   |  2 +-
 src/Interpreters/SystemLog.cpp   | 21 +--------------------
 src/Interpreters/SystemLog.h     |  7 +------
 6 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 127c8862a35..45f4eb1c5a6 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -273,6 +273,25 @@ void SystemLogBase<LogElement>::startup()
     saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
 }
 
+template <typename LogElement>
+void SystemLogBase<LogElement>::stopFlushThread()
+{
+    {
+        std::lock_guard lock(thread_mutex);
+
+        if (!saving_thread || !saving_thread->joinable())
+            return;
+
+        if (is_shutdown)
+            return;
+
+        is_shutdown = true;
+        queue->shutdown();
+    }
+
+    saving_thread->join();
+}
+
 template <typename LogElement>
 void SystemLogBase<LogElement>::add(LogElement element)
 {
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index 0d7b04d5c57..0942e920a42 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -216,6 +216,8 @@ public:
     static consteval bool shouldTurnOffLogger() { return false; }
 
 protected:
+    void stopFlushThread() final;
+
     std::shared_ptr<SystemLogQueue<LogElement>> queue;
 };
 }
diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp
index 15970ca5b81..1b285aad3ff 100644
--- a/src/Interpreters/PeriodicLog.cpp
+++ b/src/Interpreters/PeriodicLog.cpp
@@ -11,7 +11,7 @@ void PeriodicLog<LogElement>::startCollect(size_t collect_interval_milliseconds_
 {
     collect_interval_milliseconds = collect_interval_milliseconds_;
     is_shutdown_metric_thread = false;
-    flush_thread = std::make_unique<ThreadFromGlobalPool>([this] { threadFunction(); });
+    collecting_thread = std::make_unique<ThreadFromGlobalPool>([this] { threadFunction(); });
 }
 
 template <typename LogElement>
@@ -20,8 +20,8 @@ void PeriodicLog<LogElement>::stopCollect()
     bool old_val = false;
     if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true))
         return;
-    if (flush_thread)
-        flush_thread->join();
+    if (collecting_thread)
+        collecting_thread->join();
 }
 
 template <typename LogElement>
diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h
index ceac8088d40..8254a02434a 100644
--- a/src/Interpreters/PeriodicLog.h
+++ b/src/Interpreters/PeriodicLog.h
@@ -36,7 +36,7 @@ protected:
 private:
     void threadFunction();
 
-    std::unique_ptr<ThreadFromGlobalPool> flush_thread;
+    std::unique_ptr<ThreadFromGlobalPool> collecting_thread;
     size_t collect_interval_milliseconds;
     std::atomic<bool> is_shutdown_metric_thread{false};
 };
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 832c39bfaf8..6a3ec197c6e 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -402,32 +402,13 @@ SystemLog<LogElement>::SystemLog(
 template <typename LogElement>
 void SystemLog<LogElement>::shutdown()
 {
-    stopFlushThread();
+    Base::stopFlushThread();
 
     auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext());
     if (table)
         table->flushAndShutdown();
 }
 
-template <typename LogElement>
-void SystemLog<LogElement>::stopFlushThread()
-{
-    {
-        std::lock_guard lock(thread_mutex);
-
-        if (!saving_thread || !saving_thread->joinable())
-            return;
-
-        if (is_shutdown)
-            return;
-
-        is_shutdown = true;
-        queue->shutdown();
-    }
-
-    saving_thread->join();
-}
-
 
 template <typename LogElement>
 void SystemLog<LogElement>::savingThreadFunction()
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 9e1af3578bd..31652c1af67 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -125,8 +125,6 @@ public:
 
     void shutdown() override;
 
-    void stopFlushThread() override;
-
     /** Creates new table if it does not exist.
       * Renames old table if its structure is not suitable.
       * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created.
@@ -136,10 +134,7 @@ public:
 protected:
     LoggerPtr log;
 
-    using ISystemLog::is_shutdown;
-    using ISystemLog::saving_thread;
-    using ISystemLog::thread_mutex;
-    using Base::queue;
+   using Base::queue;
 
     StoragePtr getStorage() const;
 

From 837f2bba8a136170b6aa8800b6b30849a9310e5f Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 17:23:45 +0200
Subject: [PATCH 064/114] init

---
 .../0_stateless/00080_show_tables_and_system_tables.sql     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
index a58f9ddb0ac..02e3645ece0 100644
--- a/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
+++ b/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
@@ -6,8 +6,8 @@ CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier};
 CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.A (A UInt8) ENGINE = TinyLog;
 CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.B (A UInt8) ENGINE = TinyLog;
 
-SHOW TABLES from {CLICKHOUSE_DATABASE:Identifier};
-SHOW TABLES in system where engine like '%System%' and name in ('numbers', 'one');
+SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier};
+SHOW TABLES IN system WHERE engine LIKE '%System%' AND name IN ('numbers', 'one') AND database = 'system';
 
 SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = currentDatabase() ORDER BY name FORMAT TSVRaw;
 
@@ -16,7 +16,7 @@ SELECT name FROM system.tables WHERE is_temporary = 1 AND name = 'test_temporary
 
 CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test_log(id UInt64) ENGINE = Log;
 CREATE MATERIALIZED VIEW {CLICKHOUSE_DATABASE:Identifier}.test_materialized ENGINE = Log AS SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_log;
-SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log' and database=currentDatabase();
+SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log' AND database=currentDatabase();
 
 DROP DATABASE {CLICKHOUSE_DATABASE:Identifier};
 

From 51fbc629c6dff4653e687228b0507947516072bb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:42:17 +0000
Subject: [PATCH 065/114] Update version_date.tsv and changelogs after
 v24.7.3.47-stable

---
 docs/changelogs/v24.7.3.47-stable.md | 55 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  4 ++
 2 files changed, 59 insertions(+)
 create mode 100644 docs/changelogs/v24.7.3.47-stable.md

diff --git a/docs/changelogs/v24.7.3.47-stable.md b/docs/changelogs/v24.7.3.47-stable.md
new file mode 100644
index 00000000000..e5f23a70fe1
--- /dev/null
+++ b/docs/changelogs/v24.7.3.47-stable.md
@@ -0,0 +1,55 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.7.3.47-stable (2e50fe27a14) FIXME as compared to v24.7.2.13-stable (6e41f601b2f)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
+* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)).
+* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)).
+* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)).
+* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)).
+* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)).
+* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8556375d543..6ef5ace4ba6 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,11 +1,14 @@
 v24.8.1.2684-lts	2024-08-21
+v24.7.3.47-stable	2024-08-22
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
+v24.6.3.38-stable	2024-08-22
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05
+v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13
@@ -14,6 +17,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.9.5-lts	2024-08-22
 v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02

From 5f61e193401c5fa46db03542cb88ba4188ed00e9 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 17:46:47 +0200
Subject: [PATCH 066/114] small fixes

---
 docs/ru/getting-started/install.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md
index 4a0ec258c64..5bce41ec07a 100644
--- a/docs/ru/getting-started/install.md
+++ b/docs/ru/getting-started/install.md
@@ -22,7 +22,7 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
 
 ### Из deb-пакетов {#install-from-deb-packages}
 
-Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните:
+Рекомендуется использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните:
 
 ``` bash
 sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
@@ -55,7 +55,7 @@ clickhouse-client # or "clickhouse-client --password" if you've set up a passwor
 :::
 ### Из rpm-пакетов {#from-rpm-packages}
 
-Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm.
+Команда ClickHouse рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm.
 
 #### Установка официального репозитория
 
@@ -102,7 +102,7 @@ sudo yum install clickhouse-server clickhouse-client
 
 ### Из tgz-архивов {#from-tgz-archives}
 
-Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов.
+Команда ClickHouse рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов.
 
 Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/.
 После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии:

From 980b02bfd67defbbdf78165e8225fb754d722d7a Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 17:48:57 +0200
Subject: [PATCH 067/114] fix compatibility with en version

---
 docs/ru/getting-started/install.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md
index 5bce41ec07a..f8a660fbec9 100644
--- a/docs/ru/getting-started/install.md
+++ b/docs/ru/getting-started/install.md
@@ -31,9 +31,17 @@ curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | s
 echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
     /etc/apt/sources.list.d/clickhouse.list
 sudo apt-get update
+```
 
+#### Установка ClickHouse server и client
+
+```bash
 sudo apt-get install -y clickhouse-server clickhouse-client
+```
 
+#### Запуск ClickHouse server
+
+```bash
 sudo service clickhouse-server start
 clickhouse-client # or "clickhouse-client --password" if you've set up a password.
 ```

From 7c3a013d56c1dbd5b72f04f6be61f007004aaefa Mon Sep 17 00:00:00 2001
From: Mark Needham <m.h.needham@gmail.com>
Date: Thu, 22 Aug 2024 16:53:30 +0100
Subject: [PATCH 068/114] Update newjson.md

---
 docs/en/sql-reference/data-types/newjson.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md
index 9e43216df6c..f7fc7e1498e 100644
--- a/docs/en/sql-reference/data-types/newjson.md
+++ b/docs/en/sql-reference/data-types/newjson.md
@@ -70,7 +70,7 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json
 └────────────────────────────────────────────────┘
 ```
 
-CAST from named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later.
+CAST from `JSON`, named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later.
 
 ## Reading JSON paths as subcolumns
 

From 28fbd8a4eff4eafa7db99eb37e38376ffda11763 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 17:56:16 +0200
Subject: [PATCH 069/114] fix stateless tests

---
 .../queries/0_stateless/03203_hive_style_partitioning.reference | 2 --
 tests/queries/0_stateless/03203_hive_style_partitioning.sh      | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index acdadc2510b..a9d856babce 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -34,8 +34,6 @@ Cross	Elizabeth
 Array(Int64)	LowCardinality(Float64)
 101
 2070
-4081
-2070
 2070
 b
 1
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index b3d196924af..6734c5f14ad 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -32,7 +32,7 @@ SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMI
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
-SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
From 9c0e1df1663dd5c56066dd615fc3cafe6408d308 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 22 Aug 2024 17:58:15 +0200
Subject: [PATCH 070/114] Fix flaky test 00989_parallel_parts_loading

---
 tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
index 407e124f137..dc074241ff6 100644
--- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql
+++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
@@ -1,3 +1,5 @@
+-- Tags: no-random-settings, no-random-merge-tree-settings
+-- small insert block size can make insert terribly slow, especially with some build like msan
 DROP TABLE IF EXISTS mt;
 
 CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;

From 0bd8ebf62616ce882b0ebc46945c837a5a91ba44 Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Thu, 22 Aug 2024 17:58:56 +0200
Subject: [PATCH 071/114] Update README.md

adding community call. resolving recent recordings
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c9474ef0fc0..9099fd48659 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh
 
 Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
 
-* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 20
+* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 266
 
 ## Upcoming Events
 
@@ -58,7 +58,7 @@ Other upcoming meetups
 
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
-* **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now!
+* **Recording available**: [**v24.8 LTS Release Call**](https://www.youtube.com/watch?v=AeLmp2jc51k) All the features of 24.8 LTS, one convenient video! Watch it now!
   
  ## Interested in joining ClickHouse and making it your full-time job? 
   

From 52cdd88eb6d7bbb5d395dd80445655ad47c83c92 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 22 Aug 2024 17:59:10 +0200
Subject: [PATCH 072/114] Better comment

---
 tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
index dc074241ff6..3b73e6a0e3c 100644
--- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql
+++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql
@@ -1,5 +1,5 @@
 -- Tags: no-random-settings, no-random-merge-tree-settings
--- small insert block size can make insert terribly slow, especially with some build like msan
+-- small number of insert threads can make insert terribly slow, especially with some build like msan
 DROP TABLE IF EXISTS mt;
 
 CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000;

From e7b89537bf1bb760c6082f04de4668bd1c00f33a Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 22 Aug 2024 18:02:42 +0200
Subject: [PATCH 073/114] fix style

---
 src/Interpreters/PeriodicLog.cpp | 1 -
 src/Interpreters/SystemLog.h     | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp
index 1b285aad3ff..22bc14856c4 100644
--- a/src/Interpreters/PeriodicLog.cpp
+++ b/src/Interpreters/PeriodicLog.cpp
@@ -1,7 +1,6 @@
 #include <Interpreters/PeriodicLog.h>
 #include <Interpreters/ErrorLog.h>
 #include <Interpreters/MetricLog.h>
-#include "Functions/DateTimeTransforms.h"
 
 namespace DB
 {
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 31652c1af67..c03f9370068 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -134,7 +134,7 @@ public:
 protected:
     LoggerPtr log;
 
-   using Base::queue;
+    using Base::queue;
 
     StoragePtr getStorage() const;
 

From 1692360233593e635c5a7797847bdfd8a0ffa33e Mon Sep 17 00:00:00 2001
From: Tyler Hannan <tyler@clickhouse.com>
Date: Thu, 22 Aug 2024 18:12:38 +0200
Subject: [PATCH 074/114] Update README.md

26 and 266 are different
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9099fd48659..83a5c05c667 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh
 
 Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
 
-* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 266
+* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26
 
 ## Upcoming Events
 

From 4264fbc037accedecebcd8122910e4406e92cd58 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 16:16:47 +0000
Subject: [PATCH 075/114] Update version_date.tsv and changelogs after
 v24.8.2.3-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v24.8.2.3-lts.md     | 12 ++++++++++++
 utils/list-versions/version_date.tsv |  5 +++++
 5 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v24.8.2.3-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index fc93cee5bbc..6ff7ea43374 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.8.1.2684"
+ARG VERSION="24.8.2.3"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 3ceaf2a08b4..c87885d3b49 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.8.1.2684"
+ARG VERSION="24.8.2.3"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 76db997821c..6ccf74823e2 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.8.1.2684"
+ARG VERSION="24.8.2.3"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 #docker-official-library:off
diff --git a/docs/changelogs/v24.8.2.3-lts.md b/docs/changelogs/v24.8.2.3-lts.md
new file mode 100644
index 00000000000..69dfc9961a2
--- /dev/null
+++ b/docs/changelogs/v24.8.2.3-lts.md
@@ -0,0 +1,12 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.8.2.3-lts (b54f79ed323) FIXME as compared to v24.8.1.2684-lts (161c62fd295)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68670](https://github.com/ClickHouse/ClickHouse/issues/68670): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 8556375d543..199c4f822f4 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,11 +1,15 @@
+v24.8.2.3-lts	2024-08-22
 v24.8.1.2684-lts	2024-08-21
+v24.7.3.47-stable	2024-08-22
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
+v24.6.3.38-stable	2024-08-22
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05
+v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13
@@ -14,6 +18,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.9.5-lts	2024-08-22
 v24.3.8.13-lts	2024-08-20
 v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02

From fa453c3664b18da7a6945e662b881f80fedadc5b Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 22 Aug 2024 18:13:45 +0200
Subject: [PATCH 076/114] Disable SqlLogic job

---
 tests/ci/ci_config.py      | 7 ++++---
 tests/ci/ci_definitions.py | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 58de25f039f..0885f1d9ec2 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -498,9 +498,10 @@ class CI:
         JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_DEBUG],
         ),
-        JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties(
-            required_builds=[BuildNames.PACKAGE_RELEASE],
-        ),
+        # TODO: job does not work at all, uncomment and fix
+        # JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties(
+        #     required_builds=[BuildNames.PACKAGE_RELEASE],
+        # ),
         JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties(
             required_builds=[BuildNames.PACKAGE_RELEASE],
         ),
diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py
index 1cdb3f1487e..9d95a19790f 100644
--- a/tests/ci/ci_definitions.py
+++ b/tests/ci/ci_definitions.py
@@ -204,7 +204,7 @@ class JobNames(metaclass=WithIter):
     PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)"
     PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)"
 
-    SQL_LOGIC_TEST = "Sqllogic test (release)"
+    # SQL_LOGIC_TEST = "Sqllogic test (release)"
 
     SQLANCER = "SQLancer (release)"
     SQLANCER_DEBUG = "SQLancer (debug)"

From 06c46ee75bcb94fe02ac68df6a4a044145804d76 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 18:56:50 +0200
Subject: [PATCH 077/114] add one more test

---
 .../0_stateless/03203_hive_style_partitioning.reference     | 1 +
 tests/queries/0_stateless/03203_hive_style_partitioning.sh  | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index a9d856babce..0fbc1fb556e 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -37,6 +37,7 @@ Array(Int64)	LowCardinality(Float64)
 2070
 b
 1
+1
 TESTING THE URL PARTITIONING
  last	Elizabeth
 Frank	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 6734c5f14ad..8ab18f5edfe 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -29,6 +29,12 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c
 SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """
 
+$CLICKHOUSE_LOCAL -n -q """
+set use_hive_partitioning = 1;
+
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
+""" 2>&1 | grep -c "INCORRECT_DATA"
+
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 

From 2a32207e9ee44d52d6fbca7313d847b4eef1c4fb Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 22 Aug 2024 01:20:46 +0200
Subject: [PATCH 078/114] fix: wrap in conditional preprocessor directives

---
 src/Functions/FunctionsHashingRipe.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Functions/FunctionsHashingRipe.cpp b/src/Functions/FunctionsHashingRipe.cpp
index 5b06b8ab924..315296b7690 100644
--- a/src/Functions/FunctionsHashingRipe.cpp
+++ b/src/Functions/FunctionsHashingRipe.cpp
@@ -7,6 +7,7 @@
 /// due to excessive resource consumption.
 namespace DB
 {
+#if USE_SSL
 REGISTER_FUNCTION(HashingRipe)
 {
     factory.registerFunction<FunctionRipeMD160Hash>(FunctionDocumentation{
@@ -18,4 +19,5 @@ REGISTER_FUNCTION(HashingRipe)
         )"}},
         .categories{"Hash"}});
 }
+#endif
 }

From ef9fbe3006b3023bf47e3a0109490d166071c2aa Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 22 Aug 2024 01:20:46 +0200
Subject: [PATCH 079/114] fix: disable running test in fasttest due to missing
 OpenSSL

---
 tests/queries/0_stateless/03222_ripeMD160.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/03222_ripeMD160.sql b/tests/queries/0_stateless/03222_ripeMD160.sql
index 592f9f830dd..9d418376a20 100644
--- a/tests/queries/0_stateless/03222_ripeMD160.sql
+++ b/tests/queries/0_stateless/03222_ripeMD160.sql
@@ -1,3 +1,4 @@
+-- Tags: no-fasttest
 -- Ouput can be verified using: https://emn178.github.io/online-tools/ripemd-160/
 
 SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));

From a2ff8e4384f04b2e57d3de93a1ba63971f94794e Mon Sep 17 00:00:00 2001
From: Dergousov <myksik442@gmail.com>
Date: Thu, 22 Aug 2024 20:44:52 +0300
Subject: [PATCH 080/114] fix: correct return type inconsistencies in docs

---
 docs/en/sql-reference/functions/hash-functions.md | 5 ++---
 docs/ru/sql-reference/functions/hash-functions.md | 5 +++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 9b7ac8af0e3..cd1c85b5f4c 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -689,9 +689,8 @@ SELECT kostikConsistentHash(16045690984833335023, 2);
 ```
 
 ## ripeMD160
-Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash of a string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md).
-
 
+Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value.
 
 **Syntax**
 
@@ -705,7 +704,7 @@ ripeMD160('input')
 
 **Returned value**
 
-- A [UInt256](../data-types/int-uint.md) hash value
+- A [UInt256]((../data-types/int-uint.md)) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded.
 
 **Example**
 Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md
index 66d77e66972..b7adcfc1829 100644
--- a/docs/ru/sql-reference/functions/hash-functions.md
+++ b/docs/ru/sql-reference/functions/hash-functions.md
@@ -125,7 +125,8 @@ SELECT hex(sipHash128('foo', '\x01', 3));
 ```
 
 ## ripeMD160
-Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки и возвращает полученный набор байт в виде [FixedString](../data-types/fixedstring.md).
+
+Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки.
 
 **Синтаксис**
 
@@ -139,7 +140,7 @@ ripeMD160('input')
 
 **Возвращаемое значение**
 
-- [UInt256](../data-types/int-uint.md) хеш-значение
+- [UInt256](../data-types/int-uint.md), где 160-битный хеш RIPEMD-160 хранится в первых 20 байтах. Оставшиеся 12 байт заполняются нулями.
 
 **Пример**
 Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой

From f89193fa416cc333f549d72bb8ba453907edc951 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 22 Aug 2024 19:12:19 +0000
Subject: [PATCH 081/114] Update version_date.tsv and changelogs after
 v24.5.5.41-stable

---
 utils/list-versions/version_date.tsv | 2 --
 1 file changed, 2 deletions(-)

diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 199c4f822f4..0e25f8d3b62 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,11 +1,9 @@
 v24.8.2.3-lts	2024-08-22
 v24.8.1.2684-lts	2024-08-21
-v24.7.3.47-stable	2024-08-22
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
-v24.6.3.38-stable	2024-08-22
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
 v24.5.5.78-stable	2024-08-05

From 4200b3d5cbbfe065073c40f1e122c44189f3554f Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 22 Aug 2024 14:02:25 +0200
Subject: [PATCH 082/114] CI: Stress test fix

---
 tests/clickhouse-test                 | 2 +-
 tests/docker_scripts/stress_runner.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 4f9380d6f20..ad6173065fe 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -3567,7 +3567,7 @@ if __name__ == "__main__":
             f"Cannot access the specified directory with queries ({args.queries})",
             file=sys.stderr,
         )
-        sys.exit(1)
+        assert False, "No --queries provided"
 
     CAPTURE_CLIENT_STACKTRACE = args.capture_client_stacktrace
 
diff --git a/tests/docker_scripts/stress_runner.sh b/tests/docker_scripts/stress_runner.sh
index 7666398e10b..039c60c8e4e 100755
--- a/tests/docker_scripts/stress_runner.sh
+++ b/tests/docker_scripts/stress_runner.sh
@@ -10,8 +10,7 @@ dmesg --clear
 # shellcheck disable=SC1091
 source /setup_export_logs.sh
 
-ln -s /repo/tests/clickhouse-test/ci/stress.py /usr/bin/stress
-ln -s /repo/tests/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
+ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test
 
 # Stress tests and upgrade check uses similar code that was placed
 # in a separate bash library. See tests/ci/stress_tests.lib
@@ -266,6 +265,7 @@ fi
 
 start_server
 
+cd /repo/tests/ || exit 1  # clickhouse-test can find queries dir from there
 python3 /repo/tests/ci/stress.py --hung-check --drop-databases --output-folder /test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
     && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
     || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv

From 69f6ea5083f1686becce4ca9fcf47d1404f2d3ed Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <thevar1able@users.noreply.github.com>
Date: Thu, 22 Aug 2024 22:07:02 +0200
Subject: [PATCH 083/114] Update
 docs/en/sql-reference/functions/hash-functions.md

---
 docs/en/sql-reference/functions/hash-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index cd1c85b5f4c..55126640e34 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -704,7 +704,7 @@ ripeMD160('input')
 
 **Returned value**
 
-- A [UInt256]((../data-types/int-uint.md)) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded.
+- A [UInt256](../data-types/int-uint.md) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded.
 
 **Example**
 Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.

From dc862b1411884a462bba8dcf86a474ccbe57e380 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 22 Aug 2024 23:40:18 +0200
Subject: [PATCH 084/114] fix test

---
 tests/queries/0_stateless/03203_hive_style_partitioning.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 8ab18f5edfe..60e8a6e9faa 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -32,7 +32,7 @@ SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMI
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "INCORRECT_DATA"
 
 $CLICKHOUSE_LOCAL -n -q """

From 4c790999eb6ad74e3a8f99c072dcc12c956a63d8 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 23 Aug 2024 02:18:26 +0200
Subject: [PATCH 085/114] CI: Force package_debug build on release branches

---
 .github/workflows/release_branches.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 82826794ea3..ec119b6ff95 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -130,6 +130,7 @@ jobs:
     with:
       build_name: package_debug
       data: ${{ needs.RunConfig.outputs.data }}
+      force: true
   BuilderBinDarwin:
     needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() }}

From f5739dfe06db8610818fafb5c3a2c33f59fd0a8d Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 23 Aug 2024 02:51:27 +0200
Subject: [PATCH 086/114] CI: Make job rerun possible if triggered manually

---
 tests/ci/ci.py       | 7 +++++--
 tests/ci/ci_utils.py | 5 +++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index a9ae078b449..d201b6602f5 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -333,7 +333,10 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
             CI.JobNames.BUILD_CHECK,
         ):  # we might want to rerun build report job
             rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name))
-            if rerun_helper.is_already_finished_by_status():
+            if (
+                rerun_helper.is_already_finished_by_status()
+                and not Utils.is_job_triggered_manually()
+            ):
                 print("WARNING: Rerunning job with GH status ")
                 status = rerun_helper.get_finished_status()
                 assert status
@@ -344,7 +347,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
                 skip_status = status.state
 
         # ci cache check
-        if not to_be_skipped and not no_cache:
+        if not to_be_skipped and not no_cache and not Utils.is_job_triggered_manually():
             ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update()
             job_config = CI.get_job_config(job_name)
             if ci_cache.is_successful(
diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py
index a4c0977f47c..e8d9e7dc254 100644
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@@ -18,6 +18,7 @@ class Envs:
     )
     S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
     GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "")
+    GITHUB_ACTOR = os.getenv("GITHUB_ACTOR", "")
 
 
 class WithIter(type):
@@ -282,3 +283,7 @@ class Utils:
         ):
             res = res.replace(*r)
         return res
+
+    @staticmethod
+    def is_job_triggered_manually():
+        return "robot" not in Envs.GITHUB_ACTOR

From 60e4bcbbf0b1991b42bcab4b83e55be344e8a659 Mon Sep 17 00:00:00 2001
From: Tanya Bragin <tbragin@users.noreply.github.com>
Date: Thu, 22 Aug 2024 20:45:28 -0700
Subject: [PATCH 087/114] Update README.md

Update Raleigh meetup link
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 83a5c05c667..546f08afd3d 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey
 
 * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
-* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9
+* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
 * [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
 

From e5380806653f8d391c6e88664b0096c3c51240f5 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 23 Aug 2024 07:09:03 +0000
Subject: [PATCH 088/114] Update version_date.tsv and changelogs after
 v24.5.6.45-stable

---
 docs/changelogs/v24.5.6.45-stable.md | 33 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 +-
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 docs/changelogs/v24.5.6.45-stable.md

diff --git a/docs/changelogs/v24.5.6.45-stable.md b/docs/changelogs/v24.5.6.45-stable.md
new file mode 100644
index 00000000000..b329ebab27b
--- /dev/null
+++ b/docs/changelogs/v24.5.6.45-stable.md
@@ -0,0 +1,33 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.5.6.45-stable (bdca8604c29) FIXME as compared to v24.5.5.78-stable (0138248cb62)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Backported in [#68756](https://github.com/ClickHouse/ClickHouse/issues/68756): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 0e25f8d3b62..57a59d7ac49 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -6,8 +6,8 @@ v24.7.1.2915-stable	2024-07-30
 v24.6.3.95-stable	2024-08-06
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
+v24.5.6.45-stable	2024-08-23
 v24.5.5.78-stable	2024-08-05
-v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13

From e1a7bd9163bebf0aeab12d8dd46c729f73b068be Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 23 Aug 2024 07:37:32 +0000
Subject: [PATCH 089/114] Update version_date.tsv and changelogs after
 v24.6.4.42-stable

---
 docs/changelogs/v24.6.4.42-stable.md | 33 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  3 ++-
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 docs/changelogs/v24.6.4.42-stable.md

diff --git a/docs/changelogs/v24.6.4.42-stable.md b/docs/changelogs/v24.6.4.42-stable.md
new file mode 100644
index 00000000000..29b6ba095af
--- /dev/null
+++ b/docs/changelogs/v24.6.4.42-stable.md
@@ -0,0 +1,33 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.6.4.42-stable (c534bb4b4dd) FIXME as compared to v24.6.3.95-stable (8325c920d11)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Backported in [#68758](https://github.com/ClickHouse/ClickHouse/issues/68758): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 0e25f8d3b62..8ce510f110d 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -3,11 +3,12 @@ v24.8.1.2684-lts	2024-08-21
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
+v24.6.4.42-stable	2024-08-23
 v24.6.3.95-stable	2024-08-06
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01
+v24.5.6.45-stable	2024-08-23
 v24.5.5.78-stable	2024-08-05
-v24.5.5.41-stable	2024-08-22
 v24.5.4.49-stable	2024-07-01
 v24.5.3.5-stable	2024-06-13
 v24.5.2.34-stable	2024-06-13

From eec720dab60ea63b033919bbc4c1f6837920a42d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 23 Aug 2024 08:05:27 +0000
Subject: [PATCH 090/114] Update version_date.tsv and changelogs after
 v24.7.4.51-stable

---
 docs/changelogs/v24.7.4.51-stable.md | 36 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  2 ++
 2 files changed, 38 insertions(+)
 create mode 100644 docs/changelogs/v24.7.4.51-stable.md

diff --git a/docs/changelogs/v24.7.4.51-stable.md b/docs/changelogs/v24.7.4.51-stable.md
new file mode 100644
index 00000000000..a7cf9790383
--- /dev/null
+++ b/docs/changelogs/v24.7.4.51-stable.md
@@ -0,0 +1,36 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.7.4.51-stable (70fe2f6fa52) FIXME as compared to v24.7.3.42-stable (63730bc4293)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
+* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)).
+* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
+* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
+* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)).
+* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)).
+* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
+* Backported in [#68760](https://github.com/ClickHouse/ClickHouse/issues/68760): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 57a59d7ac49..d9674ed2366 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,8 +1,10 @@
 v24.8.2.3-lts	2024-08-22
 v24.8.1.2684-lts	2024-08-21
+v24.7.4.51-stable	2024-08-23
 v24.7.3.42-stable	2024-08-08
 v24.7.2.13-stable	2024-08-01
 v24.7.1.2915-stable	2024-07-30
+v24.6.4.42-stable	2024-08-23
 v24.6.3.95-stable	2024-08-06
 v24.6.2.17-stable	2024-07-05
 v24.6.1.4423-stable	2024-07-01

From 6ba686d2510a2d95ab4332560163d0b4600533a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 23 Aug 2024 09:20:40 +0000
Subject: [PATCH 091/114] Split test case and reduce number of random runs to
 reduce time necessary to run the test

---
 .../01395_limit_more_cases.reference          |  1 -
 .../0_stateless/01395_limit_more_cases.sh     | 24 ++++---------------
 .../01395_limit_more_cases_random.reference   |  1 +
 .../01395_limit_more_cases_random.sh          | 22 +++++++++++++++++
 4 files changed, 28 insertions(+), 20 deletions(-)
 create mode 100644 tests/queries/0_stateless/01395_limit_more_cases_random.reference
 create mode 100755 tests/queries/0_stateless/01395_limit_more_cases_random.sh

diff --git a/tests/queries/0_stateless/01395_limit_more_cases.reference b/tests/queries/0_stateless/01395_limit_more_cases.reference
index c9d0dd73ab8..d68b987ea19 100644
--- a/tests/queries/0_stateless/01395_limit_more_cases.reference
+++ b/tests/queries/0_stateless/01395_limit_more_cases.reference
@@ -254,4 +254,3 @@
 15	13	0	0	0	0	0	0
 15	14	0	0	0	0	0	0
 15	15	0	0	0	0	0	0
-0	0	0
diff --git a/tests/queries/0_stateless/01395_limit_more_cases.sh b/tests/queries/0_stateless/01395_limit_more_cases.sh
index 177147d2142..9709bd74f26 100755
--- a/tests/queries/0_stateless/01395_limit_more_cases.sh
+++ b/tests/queries/0_stateless/01395_limit_more_cases.sh
@@ -9,8 +9,11 @@ SIZE=13
 for OFFSET in {0..15}; do
     for LIMIT in {0..15}; do
         echo "SELECT
-            $OFFSET, $LIMIT,
-            count() AS c, min(number) AS first, max(number) AS last,
+            $OFFSET,
+            $LIMIT,
+            count() AS c,
+            min(number) AS first,
+            max(number) AS last,
             throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)),
             throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)),
             throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1))
@@ -18,20 +21,3 @@ for OFFSET in {0..15}; do
         "
     done
 done | $CLICKHOUSE_CLIENT -n --max_block_size 5
-
-# Randomized test
-
-ITERATIONS=1000
-for _ in $(seq $ITERATIONS); do
-    SIZE=$(($RANDOM % 100))
-    OFFSET=$(($RANDOM % 111))
-    LIMIT=$(($RANDOM % 111))
-
-    echo "WITH count() AS c, min(number) AS first, max(number) AS last
-            SELECT
-                throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)),
-                throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)),
-                throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1))
-            FROM (SELECT * FROM numbers($SIZE) LIMIT $OFFSET, $LIMIT);
-        "
-done | $CLICKHOUSE_CLIENT -n --max_block_size $(($RANDOM % 20 + 1)) | uniq
diff --git a/tests/queries/0_stateless/01395_limit_more_cases_random.reference b/tests/queries/0_stateless/01395_limit_more_cases_random.reference
new file mode 100644
index 00000000000..06b63ea6c2f
--- /dev/null
+++ b/tests/queries/0_stateless/01395_limit_more_cases_random.reference
@@ -0,0 +1 @@
+0	0	0
diff --git a/tests/queries/0_stateless/01395_limit_more_cases_random.sh b/tests/queries/0_stateless/01395_limit_more_cases_random.sh
new file mode 100755
index 00000000000..c2f6b060aab
--- /dev/null
+++ b/tests/queries/0_stateless/01395_limit_more_cases_random.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+SIZE=13
+ITERATIONS=300
+for _ in $(seq $ITERATIONS); do
+    SIZE=$(($RANDOM % 100))
+    OFFSET=$(($RANDOM % 111))
+    LIMIT=$(($RANDOM % 111))
+
+    echo "WITH count() AS c, min(number) AS first, max(number) AS last
+            SELECT
+                throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)),
+                throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)),
+                throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1))
+            FROM (SELECT * FROM numbers($SIZE) LIMIT $OFFSET, $LIMIT);
+        "
+done | $CLICKHOUSE_CLIENT -n --max_block_size $(($RANDOM % 20 + 1)) | uniq

From 8c4329964f597b1eb8139990a41360243f9337f9 Mon Sep 17 00:00:00 2001
From: Maxim Dergousov <myksik442@gmail.com>
Date: Fri, 23 Aug 2024 12:50:18 +0300
Subject: [PATCH 092/114] small cosmetic changes in docs

---
 docs/en/sql-reference/functions/hash-functions.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 55126640e34..908e288cf59 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -695,7 +695,7 @@ Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value.
 **Syntax**
 
 ```sql
-ripeMD160('input')
+ripeMD160(input)
 ```
 
 **Parameters**
@@ -707,6 +707,7 @@ ripeMD160('input')
 - A [UInt256](../data-types/int-uint.md) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded.
 
 **Example**
+
 Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
 
 Query:

From 8cc5d766b5d70f22646e6dc2832f806736c76311 Mon Sep 17 00:00:00 2001
From: Maxim Dergousov <myksik442@gmail.com>
Date: Fri, 23 Aug 2024 12:52:55 +0300
Subject: [PATCH 093/114] small cosmetic changes in docs

---
 docs/ru/sql-reference/functions/hash-functions.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md
index b7adcfc1829..d7b90b09122 100644
--- a/docs/ru/sql-reference/functions/hash-functions.md
+++ b/docs/ru/sql-reference/functions/hash-functions.md
@@ -131,7 +131,7 @@ SELECT hex(sipHash128('foo', '\x01', 3));
 **Синтаксис**
 
 ```sql
-ripeMD160('input')
+ripeMD160(input)
 ```
 
 **Аргументы**
@@ -143,6 +143,7 @@ ripeMD160('input')
 - [UInt256](../data-types/int-uint.md), где 160-битный хеш RIPEMD-160 хранится в первых 20 байтах. Оставшиеся 12 байт заполняются нулями.
 
 **Пример**
+
 Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой
 
 Запрос:

From b0894bffe62722acee2fa5d832ceda9a75754bde Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 23 Aug 2024 12:01:17 +0200
Subject: [PATCH 094/114] change test file location

---
 .../sample.parquet                                  | Bin
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/{column0=Elizabeth => column0=Elizabeth1}/sample.parquet (100%)

diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet
similarity index 100%
rename from tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet
rename to tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet

From 1165ae756d3a6ca1b9b7c7e9be77f1812390c527 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 23 Aug 2024 12:16:16 +0000
Subject: [PATCH 095/114] Make dynamic structure selection more consistent

---
 src/Columns/ColumnDynamic.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index 1f37add9d2d..efb835b2e17 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -1182,12 +1182,13 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
     if (!canAddNewVariants(0, all_variants.size()))
     {
         /// Create list of variants with their sizes and sort it.
-        std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
+        std::vector<std::tuple<size_t, String, DataTypePtr>> variants_with_sizes;
         variants_with_sizes.reserve(all_variants.size());
         for (const auto & variant : all_variants)
         {
-            if (variant->getName() != getSharedVariantTypeName())
-                variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant);
+            auto variant_name = variant->getName();
+            if (variant_name != getSharedVariantTypeName())
+                variants_with_sizes.emplace_back(total_sizes[variant_name], variant_name, variant);
         }
         std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
 
@@ -1196,14 +1197,14 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
         result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant.
         /// Add shared variant.
         result_variants.push_back(getSharedVariantDataType());
-        for (const auto & [size, variant] : variants_with_sizes)
+        for (const auto & [size, variant_name, variant_type] : variants_with_sizes)
         {
             /// Add variant to the resulting variants list until we reach max_dynamic_types.
             if (canAddNewVariant(result_variants.size()))
-                result_variants.push_back(variant);
+                result_variants.push_back(variant_type);
             /// Add all remaining variants into shared_variants_statistics until we reach its max size.
             else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
-                new_statistics.shared_variants_statistics[variant->getName()] = size;
+                new_statistics.shared_variants_statistics[variant_name] = size;
             else
                 break;
         }

From 6f5210644b95b41cc9d490d4e117c81bd61a1d06 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 23 Aug 2024 14:43:09 +0200
Subject: [PATCH 096/114] Update src/Columns/ColumnObject.cpp

Co-authored-by: Alexander Gololobov <davenger@clickhouse.com>
---
 src/Columns/ColumnObject.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp
index 999c0f6088e..e397b03b69e 100644
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@@ -1045,7 +1045,7 @@ void ColumnObject::forEachSubcolumnRecursively(DB::IColumn::RecursiveMutableColu
 
 bool ColumnObject::structureEquals(const IColumn & rhs) const
 {
-    /// 2 Object columns have equal structure if they have the same typed paths and max_dynamic_paths/max_dynamic_types.
+    /// 2 Object columns have equal structure if they have the same typed paths and global_max_dynamic_paths/max_dynamic_types.
     const auto * rhs_object = typeid_cast<const ColumnObject *>(&rhs);
     if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types)
         return false;

From 2b20b2d4de78acf4fbb08b3f106ebdf410e4587d Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 23 Aug 2024 15:02:43 +0200
Subject: [PATCH 097/114] Update src/Columns/ColumnDynamic.cpp

Co-authored-by: Dmitry Novik <mrnovikd@gmail.com>
---
 src/Columns/ColumnDynamic.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index efb835b2e17..ef6cd7dcea2 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -1181,7 +1181,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
     /// Check if the number of all dynamic types exceeds the limit.
     if (!canAddNewVariants(0, all_variants.size()))
     {
-        /// Create list of variants with their sizes and sort it.
+        /// Create a list of variants with their sizes and names and then sort it.
         std::vector<std::tuple<size_t, String, DataTypePtr>> variants_with_sizes;
         variants_with_sizes.reserve(all_variants.size());
         for (const auto & variant : all_variants)

From 5d6b861ff055de0d04e0c574bf2ebb1e51215ace Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 23 Aug 2024 13:49:36 +0000
Subject: [PATCH 098/114] Fix index with limit=0

---
 src/Columns/ColumnVariant.cpp                     |  2 +-
 .../03228_variant_permutation_issue.reference     |  4 ++++
 .../03228_variant_permutation_issue.sql           | 15 +++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp
index 2fea3eca123..c6511695f5c 100644
--- a/src/Columns/ColumnVariant.cpp
+++ b/src/Columns/ColumnVariant.cpp
@@ -953,7 +953,7 @@ ColumnPtr ColumnVariant::index(const IColumn & indexes, size_t limit) const
 {
     /// If we have only NULLs, index will take no effect, just return resized column.
     if (hasOnlyNulls())
-        return cloneResized(limit);
+        return cloneResized(limit == 0 ? indexes.size(): limit);
 
     /// Optimization when we have only one non empty variant and no NULLs.
     /// In this case local_discriminators column is filled with identical values and offsets column
diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference
index 10688253e15..be9cdedaf07 100644
--- a/tests/queries/0_stateless/03228_variant_permutation_issue.reference
+++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference
@@ -2,3 +2,7 @@
 3	{"foo2":"bar"}	1
 2	{"foo2":"baz"}	2
 3	{"foo2":"bar"}	1
+2	{"foo2":"bar"}	1
+3	{"foo2":"bar"}	1
+2	{"foo2":"baz"}	2
+3	{"foo2":"bar"}	1
diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql
index 088361d6430..81eb2ed69af 100644
--- a/tests/queries/0_stateless/03228_variant_permutation_issue.sql
+++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql
@@ -16,3 +16,18 @@ SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id;
 
 DROP TABLE test_new_json_type;
 
+CREATE TABLE test_new_json_type(id Nullable(UInt32), data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id settings allow_nullable_key=1;
+INSERT INTO test_new_json_type format JSONEachRow
+{"id":1,"data":{"foo1":"bar"},"version":1}
+{"id":2,"data":{"foo2":"bar"},"version":1}
+{"id":3,"data":{"foo2":"bar"},"version":1}
+;
+
+SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id;
+
+INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2;
+
+SELECT * FROM test_new_json_type FINAL PREWHERE data.foo2 IS NOT NULL WHERE data.foo2 IS NOT NULL ORDER BY id ASC NULLS FIRST;
+
+DROP TABLE test_new_json_type;
+

From 61fa4e7a476b3db31c22030470341b131501f3b6 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Fri, 23 Aug 2024 16:38:48 +0200
Subject: [PATCH 099/114] fix logical err of modify statistics

---
 src/Storages/AlterCommands.cpp                     |  2 +-
 src/Storages/StatisticsDescription.cpp             |  6 ++++--
 .../integration/test_manipulate_statistics/test.py | 14 ++++++++++++--
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index d5780e32db3..67b18217767 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -734,7 +734,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
         {
             if (!metadata.columns.has(statistics_column_name))
             {
-                throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name);
+                throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot modify statistics for column {}: this column is not found", statistics_column_name);
             }
         }
 
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index 63c849e3806..acf600dd6f7 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -6,7 +6,6 @@
 #include <Parsers/ASTStatisticsDeclaration.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/ParserCreateQuery.h>
-#include <Poco/Logger.h>
 #include <Storages/ColumnsDescription.h>
 
 
@@ -115,8 +114,11 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
 
 void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other)
 {
+    /// If the statistics is empty, it's possible that we have not assign a column_name.
+    if (empty() && column_name == "")
+        column_name = other.column_name;
     if (other.column_name != column_name)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", other.column_name, column_name);
 
     types_to_desc = other.types_to_desc;
     data_type = other.data_type;
diff --git a/tests/integration/test_manipulate_statistics/test.py b/tests/integration/test_manipulate_statistics/test.py
index 2541c9b946f..ab5559e18fa 100644
--- a/tests/integration/test_manipulate_statistics/test.py
+++ b/tests/integration/test_manipulate_statistics/test.py
@@ -6,11 +6,13 @@ from helpers.cluster import ClickHouseCluster
 cluster = ClickHouseCluster(__file__)
 
 node1 = cluster.add_instance(
-    "node1", user_configs=["config/config.xml"], with_zookeeper=True
+    "node1", user_configs=["config/config.xml"], with_zookeeper=True,
+    macros={"replica": "a", "shard": "shard1"}
 )
 
 node2 = cluster.add_instance(
-    "node2", user_configs=["config/config.xml"], with_zookeeper=True
+    "node2", user_configs=["config/config.xml"], with_zookeeper=True,
+    macros={"replica": "b", "shard": "shard1"}
 )
 
 
@@ -183,3 +185,11 @@ def test_replicated_table_ddl(started_cluster):
     )
     check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "a", True)
     check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "b", True)
+
+
+def test_replicated_db(started_cluster):
+    node1.query("CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')")
+    node2.query("CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')")
+    node1.query("CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()")
+    node2.query("ALTER TABLE test.test_stats MODIFY COLUMN b Float64")
+    node2.query("ALTER TABLE test.test_stats MODIFY STATISTICS b TYPE tdigest")

From 7aabd7d2fd4a03ddea5ef311cf89b2eb7520674c Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 23 Aug 2024 15:11:51 +0000
Subject: [PATCH 100/114] Fix resolving dynamic subcolumns from subqueries in
 analyzer

---
 src/Analyzer/Resolve/IdentifierResolver.cpp              | 2 +-
 src/Analyzer/Resolve/QueryAnalyzer.cpp                   | 3 +++
 src/Analyzer/Resolve/TableExpressionData.h               | 1 +
 .../03228_dynamic_subcolumns_from_subquery.reference     | 4 ++++
 .../03228_dynamic_subcolumns_from_subquery.sql           | 9 +++++++++
 5 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference
 create mode 100644 tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql

diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp
index 14d4acc7c9b..80e7d1e4445 100644
--- a/src/Analyzer/Resolve/IdentifierResolver.cpp
+++ b/src/Analyzer/Resolve/IdentifierResolver.cpp
@@ -692,7 +692,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage(
         result_column_node = it->second;
     }
     /// Check if it's a dynamic subcolumn
-    else
+    else if (table_expression_data.supports_subcolumns)
     {
         auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name);
         auto jt = table_expression_data.column_name_to_column_node.find(column_name);
diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp
index 004da5ed341..a18c2901a58 100644
--- a/src/Analyzer/Resolve/QueryAnalyzer.cpp
+++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp
@@ -4379,7 +4379,10 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
 
         auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
         if (storage_snapshot->storage.supportsSubcolumns())
+        {
             get_column_options.withSubcolumns();
+            table_expression_data.supports_subcolumns = true;
+        }
 
         auto column_names_and_types = storage_snapshot->getColumns(get_column_options);
         table_expression_data.column_names_and_types = NamesAndTypes(column_names_and_types.begin(), column_names_and_types.end());
diff --git a/src/Analyzer/Resolve/TableExpressionData.h b/src/Analyzer/Resolve/TableExpressionData.h
index 18cbfa32366..6770672d0c2 100644
--- a/src/Analyzer/Resolve/TableExpressionData.h
+++ b/src/Analyzer/Resolve/TableExpressionData.h
@@ -36,6 +36,7 @@ struct AnalysisTableExpressionData
     std::string database_name;
     std::string table_name;
     bool should_qualify_columns = true;
+    bool supports_subcolumns = false;
     NamesAndTypes column_names_and_types;
     ColumnNameToColumnNodeMap column_name_to_column_node;
     std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns
diff --git a/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference
new file mode 100644
index 00000000000..153ad78f694
--- /dev/null
+++ b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference
@@ -0,0 +1,4 @@
+str
+42
+42
+42
diff --git a/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql
new file mode 100644
index 00000000000..a10b0cb2809
--- /dev/null
+++ b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql
@@ -0,0 +1,9 @@
+set allow_experimental_dynamic_type=1;
+set allow_experimental_json_type=1;
+set allow_experimental_analyzer=1;
+
+select d.String from (select 'str'::Dynamic as d);
+select json.a from (select '{"a" : 42}'::JSON as json);
+select json.a from (select '{"a" : 42}'::JSON(a UInt32) as json);
+select json.a.:Int64 from (select materialize('{"a" : 42}')::JSON as json);
+

From 80504e7b9b52fec79a89e2fff5881ca397022107 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 23 Aug 2024 19:07:25 +0000
Subject: [PATCH 101/114] fix test 03228_virtual_column_merge_dist

---
 .../queries/0_stateless/03228_virtual_column_merge_dist.sql  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql
index caf00a2e407..e58c7f38d3b 100644
--- a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql
+++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql
@@ -1,3 +1,6 @@
+-- There is a bug in old analyzer with currentDatabase() and distributed query.
+SET enable_analyzer = 1;
+
 DROP TABLE IF EXISTS t_local_1;
 DROP TABLE IF EXISTS t_local_2;
 DROP TABLE IF EXISTS t_merge;
@@ -10,7 +13,7 @@ INSERT INTO t_local_1 VALUES (1);
 INSERT INTO t_local_2 VALUES (2);
 
 CREATE TABLE t_merge AS t_local_1 ENGINE = Merge(currentDatabase(), '^(t_local_1|t_local_2)$');
-CREATE TABLE t_distr AS t_local_1 engine=Distributed('test_shard_localhost', currentDatabase(), t_merge, rand());
+CREATE TABLE t_distr AS t_local_1 ENGINE = Distributed('test_shard_localhost', currentDatabase(), t_merge, rand());
 
 SELECT a, _table FROM t_merge ORDER BY a;
 SELECT a, _table FROM t_distr ORDER BY a;

From a82421719383041a839289093d1882265a068cd1 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Fri, 23 Aug 2024 20:29:04 +0000
Subject: [PATCH 102/114] Done

---
 ..._rewrite_sum_column_and_constant.reference | 26 +++++++++----------
 ...alyzer_rewrite_sum_column_and_constant.sql | 11 ++++----
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference
index 802d920aaef..b41635f014e 100644
--- a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference
+++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference
@@ -1635,21 +1635,21 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 10, alias: __table1, table_name: default.test_table
 SELECT sum(float64 + 2) From test_table;
-26.5
+26.875
 SELECT sum(2 + float64) From test_table;
-26.5
+26.875
 SELECT sum(float64 - 2) From test_table;
-6.5
+6.875
 SELECT sum(2 - float64) From test_table;
--6.5
+-6.875
 SELECT sum(float64) + 2 * count(float64) From test_table;
-26.5
+26.875
 SELECT 2 * count(float64) + sum(float64) From test_table;
-26.5
+26.875
 SELECT sum(float64) - 2 * count(float64) From test_table;
-6.5
+6.875
 SELECT 2 * count(float64) - sum(float64) From test_table;
--6.5
+-6.875
 EXPLAIN QUERY TREE (SELECT sum(float64 + 2) From test_table);
 QUERY id: 0
   PROJECTION COLUMNS
@@ -2463,25 +2463,25 @@ QUERY id: 0
   JOIN TREE
     TABLE id: 12, alias: __table1, table_name: default.test_table
 SELECT sum(float64 + 2) + sum(float64 + 3) From test_table;
-58
+58.75
 SELECT sum(float64 + 2) - sum(float64 + 3) From test_table;
 -5
 SELECT sum(float64 - 2) + sum(float64 - 3) From test_table;
-8
+8.75
 SELECT sum(float64 - 2) - sum(float64 - 3) From test_table;
 5
 SELECT sum(2 - float64) - sum(3 - float64) From test_table;
 -5
 SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table;
-58
+58.75
 SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table;
 -5
 SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table;
-8
+8.75
 SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table;
 5
 SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table;
--8
+-8.75
 EXPLAIN QUERY TREE (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table);
 QUERY id: 0
   PROJECTION COLUMNS
diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql
index 5492d061c12..b6fa097abe9 100644
--- a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql
+++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql
@@ -25,11 +25,12 @@ CREATE TABLE test_table
     decimal32 Decimal32(5),
 ) ENGINE=MergeTree ORDER BY uint64;
 
-INSERT INTO test_table VALUES (1, 1.1, 1.11);
-INSERT INTO test_table VALUES (2, 2.2, 2.22);
-INSERT INTO test_table VALUES (3, 3.3, 3.33);
-INSERT INTO test_table VALUES (4, 4.4, 4.44);
-INSERT INTO test_table VALUES (5, 5.5, 5.55);
+-- Use Float64 numbers divisible by 1/16 (or some other small power of two), so that their sum doesn't depend on summation order.
+INSERT INTO test_table VALUES (1, 1.125, 1.11);
+INSERT INTO test_table VALUES (2, 2.250, 2.22);
+INSERT INTO test_table VALUES (3, 3.375, 3.33);
+INSERT INTO test_table VALUES (4, 4.500, 4.44);
+INSERT INTO test_table VALUES (5, 5.625, 5.55);
 
 -- { echoOn }
 SELECT sum(uint64 + 1 AS i) from test_table where i > 0;

From 0f265ce33d857a9c7446698629b6517b71b4a71d Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Fri, 23 Aug 2024 23:13:53 +0200
Subject: [PATCH 103/114] address comments

---
 src/Interpreters/InterpreterCreateQuery.cpp |  1 -
 src/Storages/AlterCommands.cpp              | 10 ++++------
 src/Storages/ColumnsDescription.cpp         |  4 ----
 src/Storages/MergeTree/MergeTask.cpp        |  2 +-
 src/Storages/MergeTree/MutateTask.cpp       |  4 ++--
 src/Storages/Statistics/Statistics.cpp      | 16 ++++++++--------
 src/Storages/Statistics/Statistics.h        |  6 ++++--
 src/Storages/StatisticsDescription.cpp      | 21 +++++----------------
 src/Storages/StatisticsDescription.h        |  4 ++--
 9 files changed, 26 insertions(+), 42 deletions(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 95143031707..467547e6c9e 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -700,7 +700,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
                 col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
         }
 
-        column.statistics.column_name = column.name; /// We assign column name here for better exception error message.
         if (col_decl.statistics_desc)
         {
             if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics)
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 67b18217767..07bc87b0162 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -705,9 +705,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
         }
 
         auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
-        for (const auto & stats : stats_vec)
+        for (const auto & [stats_column_name, stats] : stats_vec)
         {
-            metadata.columns.modify(stats.column_name,
+            metadata.columns.modify(stats_column_name,
                 [&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); });
         }
     }
@@ -739,9 +739,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
         }
 
         auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
-        for (const auto & stats : stats_vec)
+        for (const auto & [stats_column_name, stats] : stats_vec)
         {
-            metadata.columns.modify(stats.column_name,
+            metadata.columns.modify(stats_column_name,
                 [&](ColumnDescription & column) { column.statistics.assign(stats); });
         }
     }
@@ -866,8 +866,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
                     rename_visitor.visit(column_to_modify.default_desc.expression);
                 if (column_to_modify.ttl)
                     rename_visitor.visit(column_to_modify.ttl);
-                if (column_to_modify.name == column_name && !column_to_modify.statistics.empty())
-                    column_to_modify.statistics.column_name = rename_to;
             });
         }
         if (metadata.table_ttl.definition_ast)
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 0d724245b49..fdc3446aa46 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -209,11 +209,7 @@ void ColumnDescription::readText(ReadBuffer & buf)
                 settings = col_ast->settings->as<ASTSetQuery &>().changes;
 
             if (col_ast->statistics_desc)
-            {
                 statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type);
-                /// every column has name `x` here, so we have to set the name manually.
-                statistics.column_name = name;
-            }
         }
         else
             throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description");
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index ce06adf110c..0d34eb7f630 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -62,7 +62,7 @@ static ColumnsStatistics getStatisticsForColumns(
         const auto * desc = all_columns.tryGet(column.name);
         if (desc && !desc->statistics.empty())
         {
-            auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics);
+            auto statistics = MergeTreeStatisticsFactory::instance().get(*desc);
             all_statistics.push_back(std::move(statistics));
         }
     }
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index b9b5333a61c..1119ca324d6 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -546,7 +546,7 @@ static std::set<ColumnStatisticsPtr> getStatisticsToRecalculate(const StorageMet
     {
         if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name))
         {
-            stats_to_recalc.insert(stats_factory.get(col_desc.statistics));
+            stats_to_recalc.insert(stats_factory.get(col_desc));
         }
     }
     return stats_to_recalc;
@@ -1530,7 +1530,7 @@ private:
 
             if (ctx->materialized_statistics.contains(col.name))
             {
-                stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col.statistics));
+                stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col));
             }
             else
             {
diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp
index fd686c5f0aa..6372c804e0e 100644
--- a/src/Storages/Statistics/Statistics.cpp
+++ b/src/Storages/Statistics/Statistics.cpp
@@ -58,8 +58,8 @@ IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
 {
 }
 
-ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_)
-    : stats_desc(stats_desc_)
+ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_)
+    : stats_desc(stats_desc_), column_name(column_name_)
 {
 }
 
@@ -176,7 +176,7 @@ String ColumnStatistics::getFileName() const
 
 const String & ColumnStatistics::columnName() const
 {
-    return stats_desc.column_name;
+    return column_name;
 }
 
 UInt64 ColumnStatistics::rowCount() const
@@ -227,15 +227,15 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st
     }
 }
 
-ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescription & stats) const
+ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnDescription & column_desc) const
 {
-    ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(stats);
-    for (const auto & [type, desc] : stats.types_to_desc)
+    ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(column_desc.statistics, column_desc.name);
+    for (const auto & [type, desc] : column_desc.statistics.types_to_desc)
     {
         auto it = creators.find(type);
         if (it == creators.end())
             throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
-        auto stat_ptr = (it->second)(desc, stats.data_type);
+        auto stat_ptr = (it->second)(desc, column_desc.type);
         column_stat->stats[type] = stat_ptr;
     }
     return column_stat;
@@ -246,7 +246,7 @@ ColumnsStatistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription &
     ColumnsStatistics result;
     for (const auto & col : columns)
         if (!col.statistics.empty())
-            result.push_back(get(col.statistics));
+            result.push_back(get(col));
     return result;
 }
 
diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h
index 2a30c0de315..98666ed73df 100644
--- a/src/Storages/Statistics/Statistics.h
+++ b/src/Storages/Statistics/Statistics.h
@@ -54,7 +54,7 @@ using StatisticsPtr = std::shared_ptr<IStatistics>;
 class ColumnStatistics
 {
 public:
-    explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_);
+    explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_);
 
     void serialize(WriteBuffer & buf);
     void deserialize(ReadBuffer & buf);
@@ -73,10 +73,12 @@ public:
 private:
     friend class MergeTreeStatisticsFactory;
     ColumnStatisticsDescription stats_desc;
+    String column_name;
     std::map<StatisticsType, StatisticsPtr> stats;
     UInt64 rows = 0; /// the number of rows in the column
 };
 
+struct ColumnDescription;
 class ColumnsDescription;
 using ColumnStatisticsPtr = std::shared_ptr<ColumnStatistics>;
 using ColumnsStatistics = std::vector<ColumnStatisticsPtr>;
@@ -91,7 +93,7 @@ public:
     using Validator = std::function<void(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
     using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
 
-    ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const;
+    ColumnStatisticsPtr get(const ColumnDescription & column_desc) const;
     ColumnsStatistics getMany(const ColumnsDescription & columns) const;
 
     void registerValidator(StatisticsType type, Validator validator);
diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp
index acf600dd6f7..64634124758 100644
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@@ -96,16 +96,13 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
 {
     chassert(merging_column_type);
 
-    if (column_name.empty())
-        column_name = merging_column_name;
-
     data_type = merging_column_type;
 
     for (const auto & [stats_type, stats_desc]: other.types_to_desc)
     {
         if (!if_not_exists && types_to_desc.contains(stats_type))
         {
-            throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, column_name);
+            throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, merging_column_name);
         }
         else if (!types_to_desc.contains(stats_type))
             types_to_desc.emplace(stats_type, stats_desc);
@@ -114,12 +111,6 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
 
 void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other)
 {
-    /// If the statistics is empty, it's possible that we have not assign a column_name.
-    if (empty() && column_name == "")
-        column_name = other.column_name;
-    if (other.column_name != column_name)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", other.column_name, column_name);
-
     types_to_desc = other.types_to_desc;
     data_type = other.data_type;
 }
@@ -129,7 +120,7 @@ void ColumnStatisticsDescription::clear()
     types_to_desc.clear();
 }
 
-std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
+std::vector<std::pair<String, ColumnStatisticsDescription>> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
 {
     const auto * stat_definition_ast = definition_ast->as<ASTStatisticsDeclaration>();
     if (!stat_definition_ast)
@@ -147,7 +138,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
         statistics_types.emplace(stat.type, stat);
     }
 
-    std::vector<ColumnStatisticsDescription> result;
+    std::vector<std::pair<String, ColumnStatisticsDescription>> result;
     result.reserve(stat_definition_ast->columns->children.size());
 
     for (const auto & column_ast : stat_definition_ast->columns->children)
@@ -159,10 +150,9 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
             throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name);
 
         const auto & column = columns.getPhysical(physical_column_name);
-        stats.column_name = column.name;
         stats.data_type = column.type;
         stats.types_to_desc = statistics_types;
-        result.push_back(stats);
+        result.emplace_back(physical_column_name, stats);
     }
 
     if (result.empty())
@@ -177,14 +167,13 @@ ColumnStatisticsDescription ColumnStatisticsDescription::fromColumnDeclaration(c
     if (stat_type_list_ast->children.empty())
         throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column));
     ColumnStatisticsDescription stats;
-    stats.column_name = column.name;
     for (const auto & ast : stat_type_list_ast->children)
     {
         const auto & stat_type = ast->as<const ASTFunction &>().name;
 
         SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone());
         if (stats.types_to_desc.contains(stat.type))
-            throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", stats.column_name, stat_type);
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", column.name, stat_type);
         stats.types_to_desc.emplace(stat.type, std::move(stat));
     }
     stats.data_type = data_type;
diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h
index 03b8fb0d583..46927f1418c 100644
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@@ -55,12 +55,12 @@ struct ColumnStatisticsDescription
 
     ASTPtr getAST() const;
 
-    static std::vector<ColumnStatisticsDescription> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
+    /// get a vector of <column name, statistics desc> pair
+    static std::vector<std::pair<String, ColumnStatisticsDescription>> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
     static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type);
 
     using StatisticsTypeDescMap = std::map<StatisticsType, SingleStatisticsDescription>;
     StatisticsTypeDescMap types_to_desc;
-    String column_name;
     DataTypePtr data_type;
 };
 

From 6fb8f2b4ee10a95104bf6f8880471d24d39095dc Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Fri, 23 Aug 2024 23:19:03 +0200
Subject: [PATCH 104/114] fix black

---
 .../test_manipulate_statistics/test.py        | 24 +++++++++++++------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/tests/integration/test_manipulate_statistics/test.py b/tests/integration/test_manipulate_statistics/test.py
index ab5559e18fa..aff943e4d20 100644
--- a/tests/integration/test_manipulate_statistics/test.py
+++ b/tests/integration/test_manipulate_statistics/test.py
@@ -6,13 +6,17 @@ from helpers.cluster import ClickHouseCluster
 cluster = ClickHouseCluster(__file__)
 
 node1 = cluster.add_instance(
-    "node1", user_configs=["config/config.xml"], with_zookeeper=True,
-    macros={"replica": "a", "shard": "shard1"}
+    "node1",
+    user_configs=["config/config.xml"],
+    with_zookeeper=True,
+    macros={"replica": "a", "shard": "shard1"},
 )
 
 node2 = cluster.add_instance(
-    "node2", user_configs=["config/config.xml"], with_zookeeper=True,
-    macros={"replica": "b", "shard": "shard1"}
+    "node2",
+    user_configs=["config/config.xml"],
+    with_zookeeper=True,
+    macros={"replica": "b", "shard": "shard1"},
 )
 
 
@@ -188,8 +192,14 @@ def test_replicated_table_ddl(started_cluster):
 
 
 def test_replicated_db(started_cluster):
-    node1.query("CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')")
-    node2.query("CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')")
-    node1.query("CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()")
+    node1.query(
+        "CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
+    )
+    node2.query(
+        "CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
+    )
+    node1.query(
+        "CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()"
+    )
     node2.query("ALTER TABLE test.test_stats MODIFY COLUMN b Float64")
     node2.query("ALTER TABLE test.test_stats MODIFY STATISTICS b TYPE tdigest")

From 0a35b111ffb34f3d6a8a9e9bfa712b57b722c447 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 23 Aug 2024 20:03:38 +0000
Subject: [PATCH 105/114] fix test 03221_mutation_analyzer_skip_part

---
 .../03221_mutation_analyzer_skip_part.sh      | 46 +++++++++++++++++++
 .../03221_mutation_analyzer_skip_part.sql     | 21 ---------
 2 files changed, 46 insertions(+), 21 deletions(-)
 create mode 100755 tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh
 delete mode 100644 tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql

diff --git a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh
new file mode 100755
index 00000000000..03fd15f54e2
--- /dev/null
+++ b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Tags: no-random-settings, no-random-merge-tree-settings
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} --query "
+    DROP TABLE IF EXISTS t_mutate_skip_part;
+
+    CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64)
+    ENGINE = MergeTree ORDER BY id PARTITION BY key
+    SETTINGS min_bytes_for_wide_part = 0;
+
+    INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000);
+    INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000);
+
+    SET mutations_sync = 2;
+    ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1;
+    ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0;
+"
+
+# Mutation query may return before the entry is added to part log.
+# So, we may have to retry the flush of logs until all entries are actually flushed.
+for _ in {1..10}; do
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS"
+    res=$(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.part_log WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'")
+
+    if [[ $res -eq 4 ]]; then
+        break
+    fi
+
+    sleep 2.0
+done
+
+${CLICKHOUSE_CLIENT} --query "
+    SYSTEM FLUSH LOGS;
+
+    -- If part is skipped in mutation and hardlinked then read_rows must be 0.
+    SELECT part_name, read_rows
+    FROM system.part_log
+    WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'
+    ORDER BY part_name;
+
+    DROP TABLE IF EXISTS t_mutate_skip_part;
+"
diff --git a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql
deleted file mode 100644
index bf9a10e2af4..00000000000
--- a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-DROP TABLE IF EXISTS t_mutate_skip_part;
-
-CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64) ENGINE = MergeTree ORDER BY id PARTITION BY key;
-
-INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000);
-INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000);
-
-SET mutations_sync = 2;
-
-ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1;
-ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0;
-
-SYSTEM FLUSH LOGS;
-
--- If part is skipped in mutation and hardlinked then read_rows must be 0.
-SELECT part_name, read_rows
-FROM system.part_log
-WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'
-ORDER BY part_name;
-
-DROP TABLE IF EXISTS t_mutate_skip_part;

From 080b8f74be186738813ca9d9e12ed3e327129c33 Mon Sep 17 00:00:00 2001
From: Tanya Bragin <tbragin@users.noreply.github.com>
Date: Fri, 23 Aug 2024 15:50:56 -0700
Subject: [PATCH 106/114] Update README.md

Add Austin meetup
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 546f08afd3d..ba212852ea8 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,7 @@ Other upcoming meetups
 * [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
 * [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
 * [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
+* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
 * [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
 
 ## Recent Recordings

From 5fe151529ab58112f8fa8491d2bfff24562ff624 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Sat, 24 Aug 2024 07:33:18 +0200
Subject: [PATCH 107/114] fix flacky although that is not actually flacky

---
 tests/integration/test_manipulate_statistics/test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_manipulate_statistics/test.py b/tests/integration/test_manipulate_statistics/test.py
index aff943e4d20..3a1c5ad5b96 100644
--- a/tests/integration/test_manipulate_statistics/test.py
+++ b/tests/integration/test_manipulate_statistics/test.py
@@ -135,8 +135,8 @@ def test_single_node_normal(started_cluster):
 
 
 def test_replicated_table_ddl(started_cluster):
-    node1.query("DROP TABLE IF EXISTS test_stat")
-    node2.query("DROP TABLE IF EXISTS test_stat")
+    node1.query("DROP TABLE IF EXISTS test_stat SYNC")
+    node2.query("DROP TABLE IF EXISTS test_stat SYNC")
 
     node1.query(
         """
@@ -192,6 +192,8 @@ def test_replicated_table_ddl(started_cluster):
 
 
 def test_replicated_db(started_cluster):
+    node1.query("DROP DATABASE IF EXISTS test SYNC")
+    node2.query("DROP DATABASE IF EXISTS test SYNC")
     node1.query(
         "CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
     )

From e2aa953e700bfbabbfe69a5749f4d2806bd3610f Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 24 Aug 2024 20:45:10 +0800
Subject: [PATCH 108/114] Fix empty tuple in array

---
 src/Functions/array/arrayElement.cpp                           | 3 +++
 tests/queries/0_stateless/03229_empty_tuple_in_array.reference | 1 +
 tests/queries/0_stateless/03229_empty_tuple_in_array.sql       | 1 +
 3 files changed, 5 insertions(+)
 create mode 100644 tests/queries/0_stateless/03229_empty_tuple_in_array.reference
 create mode 100644 tests/queries/0_stateless/03229_empty_tuple_in_array.sql

diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp
index 81f3f97979b..d0b2b49cc1c 100644
--- a/src/Functions/array/arrayElement.cpp
+++ b/src/Functions/array/arrayElement.cpp
@@ -1598,6 +1598,9 @@ ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & argu
     const auto & tuple_columns = col_nested->getColumns();
     size_t tuple_size = tuple_columns.size();
 
+    if (tuple_size == 0)
+        return ColumnTuple::create(input_rows_count);
+
     const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(
         *typeid_cast<const DataTypeArray &>(*arguments[0].type).getNestedType()).getElements();
 
diff --git a/tests/queries/0_stateless/03229_empty_tuple_in_array.reference b/tests/queries/0_stateless/03229_empty_tuple_in_array.reference
new file mode 100644
index 00000000000..6a452c185a8
--- /dev/null
+++ b/tests/queries/0_stateless/03229_empty_tuple_in_array.reference
@@ -0,0 +1 @@
+()
diff --git a/tests/queries/0_stateless/03229_empty_tuple_in_array.sql b/tests/queries/0_stateless/03229_empty_tuple_in_array.sql
new file mode 100644
index 00000000000..09ba3595a5a
--- /dev/null
+++ b/tests/queries/0_stateless/03229_empty_tuple_in_array.sql
@@ -0,0 +1 @@
+select [()][0];

From 78c175225b9b4c929ed918e718351c18a166458a Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Thu, 22 Aug 2024 14:50:10 +0000
Subject: [PATCH 109/114] Done

---
 contrib/replxx                  |  2 +-
 src/Client/ReplxxLineReader.cpp | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/contrib/replxx b/contrib/replxx
index 5d04501f93a..5f696c6eb9a 160000
--- a/contrib/replxx
+++ b/contrib/replxx
@@ -1 +1 @@
-Subproject commit 5d04501f93a4fb7f0bb8b73b8f614bc986f9e25b
+Subproject commit 5f696c6eb9a88eb9784e8ff1d68bd5f70285dcc5
diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp
index 78ae6c5eb15..37ceb471e5b 100644
--- a/src/Client/ReplxxLineReader.cpp
+++ b/src/Client/ReplxxLineReader.cpp
@@ -299,13 +299,14 @@ ReplxxLineReader::ReplxxLineReader(
     Patterns delimiters_,
     const char word_break_characters_[],
     replxx::Replxx::highlighter_callback_t highlighter_,
-    [[ maybe_unused ]] std::istream & input_stream_,
-    [[ maybe_unused ]] std::ostream & output_stream_,
-    [[ maybe_unused ]] int in_fd_,
-    [[ maybe_unused ]] int out_fd_,
-    [[ maybe_unused ]] int err_fd_
+    std::istream & input_stream_,
+    std::ostream & output_stream_,
+    int in_fd_,
+    int out_fd_,
+    int err_fd_
 )
     : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_)
+    , rx(input_stream_, output_stream_, in_fd_, out_fd_, err_fd_)
     , highlighter(std::move(highlighter_))
     , word_break_characters(word_break_characters_)
     , editor(getEditor())
@@ -516,7 +517,7 @@ void ReplxxLineReader::addToHistory(const String & line)
     rx.history_add(line);
 
     // flush changes to the disk
-    if (!rx.history_save(history_file_path))
+    if (history_file_fd >= 0 && !rx.history_save(history_file_path))
         rx.print("Saving history failed: %s\n", errnoToString().c_str());
 
     if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN))

From 01523cce2a4ba21c9855ab4eb1398986cf66c64b Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Fri, 23 Aug 2024 12:14:40 +0000
Subject: [PATCH 110/114] Bump replxx

---
 contrib/replxx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/replxx b/contrib/replxx
index 5f696c6eb9a..711c18e7f4d 160000
--- a/contrib/replxx
+++ b/contrib/replxx
@@ -1 +1 @@
-Subproject commit 5f696c6eb9a88eb9784e8ff1d68bd5f70285dcc5
+Subproject commit 711c18e7f4d951255aa8b0851e5a55d5a5fb0ddb

From 385c8127cf4b7018a964705d0bdcaf17bdf494e4 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Fri, 23 Aug 2024 17:25:34 +0200
Subject: [PATCH 111/114] Fix FreeBSD build

---
 cmake/freebsd/toolchain-x86_64.cmake | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cmake/freebsd/toolchain-x86_64.cmake b/cmake/freebsd/toolchain-x86_64.cmake
index 4635880b4a6..4d814693b39 100644
--- a/cmake/freebsd/toolchain-x86_64.cmake
+++ b/cmake/freebsd/toolchain-x86_64.cmake
@@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "x86_64-pc-freebsd11")
 set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11")
 set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64")
 
+# dprintf is used in a patched version of replxx
+add_compile_definitions(_WITH_DPRINTF)
+
 set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it doesn't work in CMake

From d16388000497251856f62e8ac67ade58c29f8e85 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Sun, 25 Aug 2024 00:11:31 -0400
Subject: [PATCH 112/114] process possible SSL error on connection reset

---
 base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp
index 4873d259ae5..14c877b30af 100644
--- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp
+++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp
@@ -311,6 +311,14 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags)
 	while (mustRetry(rc, remaining_time));
 	if (rc <= 0)
 	{
+		// At this stage we still can have last not yet recieved SSL message containing SSL error
+		// so make a read to force SSL to process possible SSL error
+		if (SSL_get_error(_pSSL, rc) == SSL_ERROR_SYSCALL && SocketImpl::lastError() == POCO_ECONNRESET)
+		{
+			char c = 0;
+			SSL_read(_pSSL, &c, 1);
+		}
+
 		rc = handleError(rc);
 		if (rc == 0) throw SSLConnectionUnexpectedlyClosedException();
 	}

From f7cc3e9c59947af5b753b154f5b1b59d26fe67d4 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Sun, 25 Aug 2024 00:13:12 -0400
Subject: [PATCH 113/114] postpone SSL handshake

---
 src/Client/Connection.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index e89bd7a2bf5..da6e5baa3ad 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -145,6 +145,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
                 /// work we need to pass host name separately. It will be send into TLS Hello packet to let
                 /// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI).
                 static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setPeerHostName(host);
+                /// we want to postpone SSL handshake until first read or write operation
+                /// so any errors during negotiation would be properly processed
+                static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setLazyHandshake(true);
 #else
                 throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "tcp_secure protocol is disabled because poco library was built without NetSSL support.");
 #endif

From f38f95a144fa8840bc19647af3be9aa83a505196 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Sun, 25 Aug 2024 14:26:21 +0200
Subject: [PATCH 114/114] Update
 base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp

---
 base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp
index 14c877b30af..eaf267d8a8b 100644
--- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp
+++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp
@@ -311,7 +311,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags)
 	while (mustRetry(rc, remaining_time));
 	if (rc <= 0)
 	{
-		// At this stage we still can have last not yet recieved SSL message containing SSL error
+		// At this stage we still can have last not yet received SSL message containing SSL error
 		// so make a read to force SSL to process possible SSL error
 		if (SSL_get_error(_pSSL, rc) == SSL_ERROR_SYSCALL && SocketImpl::lastError() == POCO_ECONNRESET)
 		{